Add environment configuration file, restructure tool imports, and enhance README setup instructions
This commit is contained in:
parent
c5386ed7e6
commit
0411ca1880
11 changed files with 178 additions and 12 deletions
67
tools/__init__.py
Normal file
67
tools/__init__.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tools Package
|
||||
|
||||
This package contains all the specific tool implementations for the Hermes Agent.
|
||||
Each module provides specialized functionality for different capabilities:
|
||||
|
||||
- web_tools: Web search, content extraction, and crawling
|
||||
- terminal_tool: Command execution on virtual machines
|
||||
- vision_tools: Image analysis and understanding
|
||||
- mixture_of_agents_tool: Multi-model collaborative reasoning
|
||||
- image_generation_tool: Text-to-image generation with upscaling
|
||||
|
||||
The tools are imported into model_tools.py which provides a unified interface
|
||||
for the AI agent to access all capabilities.
|
||||
"""
|
||||
|
||||
# Export all tools for easy importing
|
||||
from .web_tools import (
|
||||
web_search_tool,
|
||||
web_extract_tool,
|
||||
web_crawl_tool,
|
||||
check_firecrawl_api_key
|
||||
)
|
||||
|
||||
from .terminal_tool import (
|
||||
terminal_tool,
|
||||
check_hecate_requirements,
|
||||
TERMINAL_TOOL_DESCRIPTION
|
||||
)
|
||||
|
||||
from .vision_tools import (
|
||||
vision_analyze_tool,
|
||||
check_vision_requirements
|
||||
)
|
||||
|
||||
from .mixture_of_agents_tool import (
|
||||
mixture_of_agents_tool,
|
||||
check_moa_requirements
|
||||
)
|
||||
|
||||
from .image_generation_tool import (
|
||||
image_generate_tool,
|
||||
check_image_generation_requirements
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Web tools
|
||||
'web_search_tool',
|
||||
'web_extract_tool',
|
||||
'web_crawl_tool',
|
||||
'check_firecrawl_api_key',
|
||||
# Terminal tools
|
||||
'terminal_tool',
|
||||
'check_hecate_requirements',
|
||||
'TERMINAL_TOOL_DESCRIPTION',
|
||||
# Vision tools
|
||||
'vision_analyze_tool',
|
||||
'check_vision_requirements',
|
||||
# MoA tools
|
||||
'mixture_of_agents_tool',
|
||||
'check_moa_requirements',
|
||||
# Image generation tools
|
||||
'image_generate_tool',
|
||||
'check_image_generation_requirements',
|
||||
]
|
||||
|
||||
566
tools/image_generation_tool.py
Normal file
566
tools/image_generation_tool.py
Normal file
|
|
@ -0,0 +1,566 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Image Generation Tools Module
|
||||
|
||||
This module provides image generation tools using FAL.ai's FLUX.1 Krea model with
|
||||
automatic upscaling via FAL.ai's Clarity Upscaler for enhanced image quality.
|
||||
|
||||
Available tools:
|
||||
- image_generate_tool: Generate images from text prompts with automatic upscaling
|
||||
|
||||
Features:
|
||||
- High-quality image generation using FLUX.1 Krea model
|
||||
- Automatic 2x upscaling using Clarity Upscaler for enhanced quality
|
||||
- Comprehensive parameter control (size, steps, guidance, etc.)
|
||||
- Proper error handling and validation with fallback to original images
|
||||
- Debug logging support
|
||||
- Sync mode for immediate results
|
||||
|
||||
Usage:
|
||||
from image_generation_tool import image_generate_tool
|
||||
import asyncio
|
||||
|
||||
# Generate and automatically upscale an image
|
||||
result = await image_generate_tool(
|
||||
prompt="A serene mountain landscape with cherry blossoms",
|
||||
image_size="landscape_4_3",
|
||||
num_images=1
|
||||
)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
import uuid
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, Union
|
||||
import fal_client
|
||||
|
||||
# Configuration for image generation
|
||||
DEFAULT_MODEL = "fal-ai/flux/krea"
|
||||
DEFAULT_IMAGE_SIZE = "landscape_4_3"
|
||||
DEFAULT_NUM_INFERENCE_STEPS = 50
|
||||
DEFAULT_GUIDANCE_SCALE = 4.5
|
||||
DEFAULT_NUM_IMAGES = 1
|
||||
DEFAULT_OUTPUT_FORMAT = "png"
|
||||
|
||||
# Configuration for automatic upscaling
|
||||
UPSCALER_MODEL = "fal-ai/clarity-upscaler"
|
||||
UPSCALER_FACTOR = 2
|
||||
UPSCALER_SAFETY_CHECKER = False
|
||||
UPSCALER_DEFAULT_PROMPT = "masterpiece, best quality, highres"
|
||||
UPSCALER_NEGATIVE_PROMPT = "(worst quality, low quality, normal quality:2)"
|
||||
UPSCALER_CREATIVITY = 0.35
|
||||
UPSCALER_RESEMBLANCE = 0.6
|
||||
UPSCALER_GUIDANCE_SCALE = 4
|
||||
UPSCALER_NUM_INFERENCE_STEPS = 18
|
||||
|
||||
# Valid parameter values for validation based on FLUX Krea documentation
|
||||
VALID_IMAGE_SIZES = [
|
||||
"square_hd", "square", "portrait_4_3", "portrait_16_9", "landscape_4_3", "landscape_16_9"
|
||||
]
|
||||
VALID_OUTPUT_FORMATS = ["jpeg", "png"]
|
||||
VALID_ACCELERATION_MODES = ["none", "regular", "high"]
|
||||
|
||||
# Debug mode configuration
|
||||
DEBUG_MODE = os.getenv("IMAGE_TOOLS_DEBUG", "false").lower() == "true"
|
||||
DEBUG_SESSION_ID = str(uuid.uuid4())
|
||||
DEBUG_LOG_PATH = Path("./logs")
|
||||
DEBUG_DATA = {
|
||||
"session_id": DEBUG_SESSION_ID,
|
||||
"start_time": datetime.datetime.now().isoformat(),
|
||||
"debug_enabled": DEBUG_MODE,
|
||||
"tool_calls": []
|
||||
} if DEBUG_MODE else None
|
||||
|
||||
# Create logs directory if debug mode is enabled
|
||||
if DEBUG_MODE:
|
||||
DEBUG_LOG_PATH.mkdir(exist_ok=True)
|
||||
print(f"🐛 Image generation debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
|
||||
|
||||
|
||||
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Log a debug call entry to the global debug data structure.
|
||||
|
||||
Args:
|
||||
tool_name (str): Name of the tool being called
|
||||
call_data (Dict[str, Any]): Data about the call including parameters and results
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return
|
||||
|
||||
call_entry = {
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
"tool_name": tool_name,
|
||||
**call_data
|
||||
}
|
||||
|
||||
DEBUG_DATA["tool_calls"].append(call_entry)
|
||||
|
||||
|
||||
def _save_debug_log() -> None:
|
||||
"""
|
||||
Save the current debug data to a JSON file in the logs directory.
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return
|
||||
|
||||
try:
|
||||
debug_filename = f"image_tools_debug_{DEBUG_SESSION_ID}.json"
|
||||
debug_filepath = DEBUG_LOG_PATH / debug_filename
|
||||
|
||||
# Update end time
|
||||
DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
|
||||
DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
|
||||
|
||||
with open(debug_filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"🐛 Image generation debug log saved: {debug_filepath}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving image generation debug log: {str(e)}")
|
||||
|
||||
|
||||
def _validate_parameters(
|
||||
image_size: Union[str, Dict[str, int]],
|
||||
num_inference_steps: int,
|
||||
guidance_scale: float,
|
||||
num_images: int,
|
||||
output_format: str,
|
||||
acceleration: str = "none"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate and normalize image generation parameters for FLUX Krea model.
|
||||
|
||||
Args:
|
||||
image_size: Either a preset string or custom size dict
|
||||
num_inference_steps: Number of inference steps
|
||||
guidance_scale: Guidance scale value
|
||||
num_images: Number of images to generate
|
||||
output_format: Output format for images
|
||||
acceleration: Acceleration mode for generation speed
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Validated and normalized parameters
|
||||
|
||||
Raises:
|
||||
ValueError: If any parameter is invalid
|
||||
"""
|
||||
validated = {}
|
||||
|
||||
# Validate image_size
|
||||
if isinstance(image_size, str):
|
||||
if image_size not in VALID_IMAGE_SIZES:
|
||||
raise ValueError(f"Invalid image_size '{image_size}'. Must be one of: {VALID_IMAGE_SIZES}")
|
||||
validated["image_size"] = image_size
|
||||
elif isinstance(image_size, dict):
|
||||
if "width" not in image_size or "height" not in image_size:
|
||||
raise ValueError("Custom image_size must contain 'width' and 'height' keys")
|
||||
if not isinstance(image_size["width"], int) or not isinstance(image_size["height"], int):
|
||||
raise ValueError("Custom image_size width and height must be integers")
|
||||
if image_size["width"] < 64 or image_size["height"] < 64:
|
||||
raise ValueError("Custom image_size dimensions must be at least 64x64")
|
||||
if image_size["width"] > 2048 or image_size["height"] > 2048:
|
||||
raise ValueError("Custom image_size dimensions must not exceed 2048x2048")
|
||||
validated["image_size"] = image_size
|
||||
else:
|
||||
raise ValueError("image_size must be either a preset string or a dict with width/height")
|
||||
|
||||
# Validate num_inference_steps
|
||||
if not isinstance(num_inference_steps, int) or num_inference_steps < 1 or num_inference_steps > 100:
|
||||
raise ValueError("num_inference_steps must be an integer between 1 and 100")
|
||||
validated["num_inference_steps"] = num_inference_steps
|
||||
|
||||
# Validate guidance_scale (FLUX Krea default is 4.5)
|
||||
if not isinstance(guidance_scale, (int, float)) or guidance_scale < 0.1 or guidance_scale > 20.0:
|
||||
raise ValueError("guidance_scale must be a number between 0.1 and 20.0")
|
||||
validated["guidance_scale"] = float(guidance_scale)
|
||||
|
||||
# Validate num_images
|
||||
if not isinstance(num_images, int) or num_images < 1 or num_images > 4:
|
||||
raise ValueError("num_images must be an integer between 1 and 4")
|
||||
validated["num_images"] = num_images
|
||||
|
||||
# Validate output_format
|
||||
if output_format not in VALID_OUTPUT_FORMATS:
|
||||
raise ValueError(f"Invalid output_format '{output_format}'. Must be one of: {VALID_OUTPUT_FORMATS}")
|
||||
validated["output_format"] = output_format
|
||||
|
||||
# Validate acceleration
|
||||
if acceleration not in VALID_ACCELERATION_MODES:
|
||||
raise ValueError(f"Invalid acceleration '{acceleration}'. Must be one of: {VALID_ACCELERATION_MODES}")
|
||||
validated["acceleration"] = acceleration
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Upscale an image using FAL.ai's Clarity Upscaler.
|
||||
|
||||
Args:
|
||||
image_url (str): URL of the image to upscale
|
||||
original_prompt (str): Original prompt used to generate the image
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Upscaled image data or None if upscaling fails
|
||||
"""
|
||||
try:
|
||||
print(f"🔍 Upscaling image with Clarity Upscaler...")
|
||||
|
||||
# Prepare arguments for upscaler
|
||||
upscaler_arguments = {
|
||||
"image_url": image_url,
|
||||
"prompt": f"{UPSCALER_DEFAULT_PROMPT}, {original_prompt}",
|
||||
"upscale_factor": UPSCALER_FACTOR,
|
||||
"negative_prompt": UPSCALER_NEGATIVE_PROMPT,
|
||||
"creativity": UPSCALER_CREATIVITY,
|
||||
"resemblance": UPSCALER_RESEMBLANCE,
|
||||
"guidance_scale": UPSCALER_GUIDANCE_SCALE,
|
||||
"num_inference_steps": UPSCALER_NUM_INFERENCE_STEPS,
|
||||
"enable_safety_checker": UPSCALER_SAFETY_CHECKER
|
||||
}
|
||||
|
||||
# Submit upscaler request
|
||||
handler = await fal_client.submit_async(
|
||||
UPSCALER_MODEL,
|
||||
arguments=upscaler_arguments
|
||||
)
|
||||
|
||||
# Get the upscaled result
|
||||
result = await handler.get()
|
||||
|
||||
if result and "image" in result:
|
||||
upscaled_image = result["image"]
|
||||
print(f"✅ Image upscaled successfully to {upscaled_image.get('width', 'unknown')}x{upscaled_image.get('height', 'unknown')}")
|
||||
return {
|
||||
"url": upscaled_image["url"],
|
||||
"width": upscaled_image.get("width", 0),
|
||||
"height": upscaled_image.get("height", 0),
|
||||
"upscaled": True,
|
||||
"upscale_factor": UPSCALER_FACTOR
|
||||
}
|
||||
else:
|
||||
print("❌ Upscaler returned invalid response")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error upscaling image: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
async def image_generate_tool(
|
||||
prompt: str,
|
||||
image_size: Union[str, Dict[str, int]] = DEFAULT_IMAGE_SIZE,
|
||||
num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS,
|
||||
guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
|
||||
num_images: int = DEFAULT_NUM_IMAGES,
|
||||
enable_safety_checker: bool = True,
|
||||
output_format: str = DEFAULT_OUTPUT_FORMAT,
|
||||
acceleration: str = "none",
|
||||
allow_nsfw_images: bool = True,
|
||||
seed: Optional[int] = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate images from text prompts using FAL.ai's FLUX.1 Krea model with automatic upscaling.
|
||||
|
||||
This tool uses FAL.ai's FLUX.1 Krea model for high-quality text-to-image generation
|
||||
with extensive customization options. Generated images are automatically upscaled 2x
|
||||
using FAL.ai's Clarity Upscaler for enhanced quality. The final upscaled images are
|
||||
returned as URLs that can be displayed using <img src="{URL}"></img> tags.
|
||||
|
||||
Args:
|
||||
prompt (str): The text prompt describing the desired image
|
||||
image_size (Union[str, Dict[str, int]]): Preset size or custom {"width": int, "height": int}
|
||||
num_inference_steps (int): Number of denoising steps (1-50, default: 28)
|
||||
guidance_scale (float): How closely to follow prompt (0.1-20.0, default: 4.5)
|
||||
num_images (int): Number of images to generate (1-4, default: 1)
|
||||
enable_safety_checker (bool): Enable content safety filtering (default: True)
|
||||
output_format (str): Image format "jpeg" or "png" (default: "png")
|
||||
acceleration (str): Generation speed "none", "regular", or "high" (default: "none")
|
||||
allow_nsfw_images (bool): Allow generation of NSFW content (default: True)
|
||||
seed (Optional[int]): Random seed for reproducible results (optional)
|
||||
|
||||
Returns:
|
||||
str: JSON string containing minimal generation results:
|
||||
{
|
||||
"success": bool,
|
||||
"image": str or None # URL of the upscaled image, or None if failed
|
||||
}
|
||||
"""
|
||||
debug_call_data = {
|
||||
"parameters": {
|
||||
"prompt": prompt,
|
||||
"image_size": image_size,
|
||||
"num_inference_steps": num_inference_steps,
|
||||
"guidance_scale": guidance_scale,
|
||||
"num_images": num_images,
|
||||
"enable_safety_checker": enable_safety_checker,
|
||||
"output_format": output_format,
|
||||
"acceleration": acceleration,
|
||||
"allow_nsfw_images": allow_nsfw_images,
|
||||
"seed": seed
|
||||
},
|
||||
"error": None,
|
||||
"success": False,
|
||||
"images_generated": 0,
|
||||
"generation_time": 0
|
||||
}
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
try:
|
||||
print(f"🎨 Generating {num_images} image(s) with FLUX Krea: {prompt[:80]}{'...' if len(prompt) > 80 else ''}")
|
||||
|
||||
# Validate prompt
|
||||
if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
|
||||
raise ValueError("Prompt is required and must be a non-empty string")
|
||||
|
||||
if len(prompt) > 1000:
|
||||
raise ValueError("Prompt must be 1000 characters or less")
|
||||
|
||||
# Check API key availability
|
||||
if not os.getenv("FAL_KEY"):
|
||||
raise ValueError("FAL_KEY environment variable not set")
|
||||
|
||||
# Validate parameters
|
||||
validated_params = _validate_parameters(
|
||||
image_size, num_inference_steps, guidance_scale, num_images, output_format, acceleration
|
||||
)
|
||||
|
||||
# Prepare arguments for FAL.ai FLUX Krea API
|
||||
arguments = {
|
||||
"prompt": prompt.strip(),
|
||||
"image_size": validated_params["image_size"],
|
||||
"num_inference_steps": validated_params["num_inference_steps"],
|
||||
"guidance_scale": validated_params["guidance_scale"],
|
||||
"num_images": validated_params["num_images"],
|
||||
"enable_safety_checker": enable_safety_checker,
|
||||
"output_format": validated_params["output_format"],
|
||||
"acceleration": validated_params["acceleration"],
|
||||
"allow_nsfw_images": allow_nsfw_images,
|
||||
"sync_mode": True # Use sync mode for immediate results
|
||||
}
|
||||
|
||||
# Add seed if provided
|
||||
if seed is not None and isinstance(seed, int):
|
||||
arguments["seed"] = seed
|
||||
|
||||
print(f"🚀 Submitting generation request to FAL.ai FLUX Krea...")
|
||||
print(f" Model: {DEFAULT_MODEL}")
|
||||
print(f" Size: {validated_params['image_size']}")
|
||||
print(f" Steps: {validated_params['num_inference_steps']}")
|
||||
print(f" Guidance: {validated_params['guidance_scale']}")
|
||||
print(f" Acceleration: {validated_params['acceleration']}")
|
||||
|
||||
# Submit request to FAL.ai
|
||||
handler = await fal_client.submit_async(
|
||||
DEFAULT_MODEL,
|
||||
arguments=arguments
|
||||
)
|
||||
|
||||
# Get the result
|
||||
result = await handler.get()
|
||||
|
||||
generation_time = (datetime.datetime.now() - start_time).total_seconds()
|
||||
|
||||
# Process the response
|
||||
if not result or "images" not in result:
|
||||
raise ValueError("Invalid response from FAL.ai API - no images returned")
|
||||
|
||||
images = result.get("images", [])
|
||||
if not images:
|
||||
raise ValueError("No images were generated")
|
||||
|
||||
# Format image data and upscale images
|
||||
formatted_images = []
|
||||
for img in images:
|
||||
if isinstance(img, dict) and "url" in img:
|
||||
original_image = {
|
||||
"url": img["url"],
|
||||
"width": img.get("width", 0),
|
||||
"height": img.get("height", 0)
|
||||
}
|
||||
|
||||
# Attempt to upscale the image
|
||||
upscaled_image = await _upscale_image(img["url"], prompt.strip())
|
||||
|
||||
if upscaled_image:
|
||||
# Use upscaled image if successful
|
||||
formatted_images.append(upscaled_image)
|
||||
else:
|
||||
# Fall back to original image if upscaling fails
|
||||
print(f"⚠️ Using original image as fallback")
|
||||
original_image["upscaled"] = False
|
||||
formatted_images.append(original_image)
|
||||
|
||||
if not formatted_images:
|
||||
raise ValueError("No valid image URLs returned from API")
|
||||
|
||||
upscaled_count = sum(1 for img in formatted_images if img.get("upscaled", False))
|
||||
print(f"✅ Generated {len(formatted_images)} image(s) in {generation_time:.1f}s ({upscaled_count} upscaled)")
|
||||
|
||||
# Prepare successful response - minimal format
|
||||
response_data = {
|
||||
"success": True,
|
||||
"image": formatted_images[0]["url"] if formatted_images else None
|
||||
}
|
||||
|
||||
debug_call_data["success"] = True
|
||||
debug_call_data["images_generated"] = len(formatted_images)
|
||||
debug_call_data["generation_time"] = generation_time
|
||||
|
||||
# Log debug information
|
||||
_log_debug_call("image_generate_tool", debug_call_data)
|
||||
_save_debug_log()
|
||||
|
||||
return json.dumps(response_data, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
generation_time = (datetime.datetime.now() - start_time).total_seconds()
|
||||
error_msg = f"Error generating image: {str(e)}"
|
||||
print(f"❌ {error_msg}")
|
||||
|
||||
# Prepare error response - minimal format
|
||||
response_data = {
|
||||
"success": False,
|
||||
"image": None
|
||||
}
|
||||
|
||||
debug_call_data["error"] = error_msg
|
||||
debug_call_data["generation_time"] = generation_time
|
||||
_log_debug_call("image_generate_tool", debug_call_data)
|
||||
_save_debug_log()
|
||||
|
||||
return json.dumps(response_data, indent=2)
|
||||
|
||||
|
||||
def check_fal_api_key() -> bool:
|
||||
"""
|
||||
Check if the FAL.ai API key is available in environment variables.
|
||||
|
||||
Returns:
|
||||
bool: True if API key is set, False otherwise
|
||||
"""
|
||||
return bool(os.getenv("FAL_KEY"))
|
||||
|
||||
|
||||
def check_image_generation_requirements() -> bool:
|
||||
"""
|
||||
Check if all requirements for image generation tools are met.
|
||||
|
||||
Returns:
|
||||
bool: True if requirements are met, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Check API key
|
||||
if not check_fal_api_key():
|
||||
return False
|
||||
|
||||
# Check if fal_client is available
|
||||
import fal_client
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about the current debug session.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Dictionary containing debug session information
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return {
|
||||
"enabled": False,
|
||||
"session_id": None,
|
||||
"log_path": None,
|
||||
"total_calls": 0
|
||||
}
|
||||
|
||||
return {
|
||||
"enabled": True,
|
||||
"session_id": DEBUG_SESSION_ID,
|
||||
"log_path": str(DEBUG_LOG_PATH / f"image_tools_debug_{DEBUG_SESSION_ID}.json"),
|
||||
"total_calls": len(DEBUG_DATA["tool_calls"])
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Simple test/demo when run directly
|
||||
"""
|
||||
print("🎨 Image Generation Tools Module - FLUX.1 Krea + Auto Upscaling")
|
||||
print("=" * 60)
|
||||
|
||||
# Check if API key is available
|
||||
api_available = check_fal_api_key()
|
||||
|
||||
if not api_available:
|
||||
print("❌ FAL_KEY environment variable not set")
|
||||
print("Please set your API key: export FAL_KEY='your-key-here'")
|
||||
print("Get API key at: https://fal.ai/")
|
||||
exit(1)
|
||||
else:
|
||||
print("✅ FAL.ai API key found")
|
||||
|
||||
# Check if fal_client is available
|
||||
try:
|
||||
import fal_client
|
||||
print("✅ fal_client library available")
|
||||
except ImportError:
|
||||
print("❌ fal_client library not found")
|
||||
print("Please install: pip install fal-client")
|
||||
exit(1)
|
||||
|
||||
print("🛠️ Image generation tools ready for use!")
|
||||
print(f"🤖 Using model: {DEFAULT_MODEL}")
|
||||
print(f"🔍 Auto-upscaling with: {UPSCALER_MODEL} ({UPSCALER_FACTOR}x)")
|
||||
|
||||
# Show debug mode status
|
||||
if DEBUG_MODE:
|
||||
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
|
||||
print(f" Debug logs will be saved to: ./logs/image_tools_debug_{DEBUG_SESSION_ID}.json")
|
||||
else:
|
||||
print("🐛 Debug mode disabled (set IMAGE_TOOLS_DEBUG=true to enable)")
|
||||
|
||||
print("\nBasic usage:")
|
||||
print(" from image_generation_tool import image_generate_tool")
|
||||
print(" import asyncio")
|
||||
print("")
|
||||
print(" async def main():")
|
||||
print(" # Generate image with automatic 2x upscaling")
|
||||
print(" result = await image_generate_tool(")
|
||||
print(" prompt='A serene mountain landscape with cherry blossoms',")
|
||||
print(" image_size='landscape_4_3',")
|
||||
print(" num_images=1")
|
||||
print(" )")
|
||||
print(" print(result)")
|
||||
print(" asyncio.run(main())")
|
||||
|
||||
print("\nSupported image sizes:")
|
||||
for size in VALID_IMAGE_SIZES:
|
||||
print(f" - {size}")
|
||||
print(" - Custom: {'width': 512, 'height': 768} (if needed)")
|
||||
|
||||
print("\nAcceleration modes:")
|
||||
for mode in VALID_ACCELERATION_MODES:
|
||||
print(f" - {mode}")
|
||||
|
||||
print("\nExample prompts:")
|
||||
print(" - 'A candid street photo of a woman with a pink bob and bold eyeliner'")
|
||||
print(" - 'Modern architecture building with glass facade, sunset lighting'")
|
||||
print(" - 'Abstract art with vibrant colors and geometric patterns'")
|
||||
print(" - 'Portrait of a wise old owl perched on ancient tree branch'")
|
||||
print(" - 'Futuristic cityscape with flying cars and neon lights'")
|
||||
|
||||
print("\nDebug mode:")
|
||||
print(" # Enable debug logging")
|
||||
print(" export IMAGE_TOOLS_DEBUG=true")
|
||||
print(" # Debug logs capture all image generation calls and results")
|
||||
print(" # Logs saved to: ./logs/image_tools_debug_UUID.json")
|
||||
586
tools/mixture_of_agents_tool.py
Normal file
586
tools/mixture_of_agents_tool.py
Normal file
|
|
@ -0,0 +1,586 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Mixture-of-Agents Tool Module
|
||||
|
||||
This module implements the Mixture-of-Agents (MoA) methodology that leverages
|
||||
the collective strengths of multiple LLMs through a layered architecture to
|
||||
achieve state-of-the-art performance on complex reasoning tasks.
|
||||
|
||||
Based on the research paper: "Mixture-of-Agents Enhances Large Language Model Capabilities"
|
||||
by Junlin Wang et al. (arXiv:2406.04692v1)
|
||||
|
||||
Key Features:
|
||||
- Multi-layer LLM collaboration for enhanced reasoning
|
||||
- Parallel processing of reference models for efficiency
|
||||
- Intelligent aggregation and synthesis of diverse responses
|
||||
- Specialized for extremely difficult problems requiring intense reasoning
|
||||
- Optimized for coding, mathematics, and complex analytical tasks
|
||||
|
||||
Available Tool:
|
||||
- mixture_of_agents_tool: Process complex queries using multiple frontier models
|
||||
|
||||
Architecture:
|
||||
1. Reference models generate diverse initial responses in parallel
|
||||
2. Aggregator model synthesizes responses into a high-quality output
|
||||
3. Multiple layers can be used for iterative refinement (future enhancement)
|
||||
|
||||
Models Used:
|
||||
- Reference Models: claude-opus-4-20250514, gemini-2.5-pro, o4-mini, deepseek-r1
|
||||
- Aggregator Model: claude-opus-4-20250514 (highest capability for synthesis)
|
||||
|
||||
Configuration:
|
||||
To customize the MoA setup, modify the configuration constants at the top of this file:
|
||||
- REFERENCE_MODELS: List of models for generating diverse initial responses
|
||||
- AGGREGATOR_MODEL: Model used to synthesize the final response
|
||||
- REFERENCE_TEMPERATURE/AGGREGATOR_TEMPERATURE: Sampling temperatures
|
||||
- MIN_SUCCESSFUL_REFERENCES: Minimum successful models needed to proceed
|
||||
|
||||
Usage:
|
||||
from mixture_of_agents_tool import mixture_of_agents_tool
|
||||
import asyncio
|
||||
|
||||
# Process a complex query
|
||||
result = await mixture_of_agents_tool(
|
||||
user_prompt="Solve this complex mathematical proof..."
|
||||
)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
import uuid
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
# Initialize Nous Research API client for MoA processing
|
||||
nous_client = AsyncOpenAI(
|
||||
api_key=os.getenv("NOUS_API_KEY"),
|
||||
base_url="https://inference-api.nousresearch.com/v1"
|
||||
)
|
||||
|
||||
# Configuration for MoA processing
|
||||
# Reference models - these generate diverse initial responses in parallel
|
||||
REFERENCE_MODELS = [
|
||||
"claude-opus-4-20250514",
|
||||
"gemini-2.5-pro",
|
||||
"gpt-5",
|
||||
"deepseek-r1"
|
||||
]
|
||||
|
||||
# Aggregator model - synthesizes reference responses into final output
|
||||
AGGREGATOR_MODEL = "claude-opus-4-20250514" # Use highest capability model for aggregation
|
||||
|
||||
# Temperature settings optimized for MoA performance
|
||||
REFERENCE_TEMPERATURE = 0.6 # Balanced creativity for diverse perspectives
|
||||
AGGREGATOR_TEMPERATURE = 0.4 # Focused synthesis for consistency
|
||||
|
||||
# Failure handling configuration
|
||||
MIN_SUCCESSFUL_REFERENCES = 1 # Minimum successful reference models needed to proceed
|
||||
|
||||
# System prompt for the aggregator model (from the research paper)
|
||||
AGGREGATOR_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
|
||||
|
||||
Responses from models:"""
|
||||
|
||||
# Debug mode configuration
|
||||
DEBUG_MODE = os.getenv("MOA_TOOLS_DEBUG", "false").lower() == "true"
|
||||
DEBUG_SESSION_ID = str(uuid.uuid4())
|
||||
DEBUG_LOG_PATH = Path("./logs")
|
||||
DEBUG_DATA = {
|
||||
"session_id": DEBUG_SESSION_ID,
|
||||
"start_time": datetime.datetime.now().isoformat(),
|
||||
"debug_enabled": DEBUG_MODE,
|
||||
"tool_calls": []
|
||||
} if DEBUG_MODE else None
|
||||
|
||||
# Create logs directory if debug mode is enabled
|
||||
if DEBUG_MODE:
|
||||
DEBUG_LOG_PATH.mkdir(exist_ok=True)
|
||||
print(f"🐛 MoA debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
|
||||
|
||||
|
||||
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Log a debug call entry to the global debug data structure.
|
||||
|
||||
Args:
|
||||
tool_name (str): Name of the tool being called
|
||||
call_data (Dict[str, Any]): Data about the call including parameters and results
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return
|
||||
|
||||
call_entry = {
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
"tool_name": tool_name,
|
||||
**call_data
|
||||
}
|
||||
|
||||
DEBUG_DATA["tool_calls"].append(call_entry)
|
||||
|
||||
|
||||
def _save_debug_log() -> None:
|
||||
"""
|
||||
Save the current debug data to a JSON file in the logs directory.
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return
|
||||
|
||||
try:
|
||||
debug_filename = f"moa_tools_debug_{DEBUG_SESSION_ID}.json"
|
||||
debug_filepath = DEBUG_LOG_PATH / debug_filename
|
||||
|
||||
# Update end time
|
||||
DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
|
||||
DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
|
||||
|
||||
with open(debug_filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"🐛 MoA debug log saved: {debug_filepath}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving MoA debug log: {str(e)}")
|
||||
|
||||
|
||||
def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> str:
|
||||
"""
|
||||
Construct the final system prompt for the aggregator including all model responses.
|
||||
|
||||
Args:
|
||||
system_prompt (str): Base system prompt for aggregation
|
||||
responses (List[str]): List of responses from reference models
|
||||
|
||||
Returns:
|
||||
str: Complete system prompt with enumerated responses
|
||||
"""
|
||||
response_text = "\n".join([f"{i+1}. {response}" for i, response in enumerate(responses)])
|
||||
return f"{system_prompt}\n\n{response_text}"
|
||||
|
||||
|
||||
async def _run_reference_model_safe(
|
||||
model: str,
|
||||
user_prompt: str,
|
||||
temperature: float = REFERENCE_TEMPERATURE,
|
||||
max_tokens: int = 32000,
|
||||
max_retries: int = 3
|
||||
) -> tuple[str, str, bool]:
|
||||
"""
|
||||
Run a single reference model with retry logic and graceful failure handling.
|
||||
|
||||
Args:
|
||||
model (str): Model identifier to use
|
||||
user_prompt (str): The user's query
|
||||
temperature (float): Sampling temperature for response generation
|
||||
max_tokens (int): Maximum tokens in response
|
||||
max_retries (int): Maximum number of retry attempts
|
||||
|
||||
Returns:
|
||||
tuple[str, str, bool]: (model_name, response_content_or_error, success_flag)
|
||||
"""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
print(f"🤖 Querying {model} (attempt {attempt + 1}/{max_retries})")
|
||||
|
||||
# Build parameters for the API call
|
||||
api_params = {
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": user_prompt}]
|
||||
}
|
||||
|
||||
# GPT models (especially gpt-4o-mini) don't support custom temperature values
|
||||
# Only include temperature for non-GPT models
|
||||
if not model.lower().startswith('gpt-'):
|
||||
api_params["temperature"] = temperature
|
||||
|
||||
response = await nous_client.chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
print(f"✅ {model} responded ({len(content)} characters)")
|
||||
return model, content, True
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# Log more detailed error information for debugging
|
||||
if "invalid" in error_str.lower():
|
||||
print(f"⚠️ {model} invalid request error (attempt {attempt + 1}): {error_str}")
|
||||
elif "rate" in error_str.lower() or "limit" in error_str.lower():
|
||||
print(f"⚠️ {model} rate limit error (attempt {attempt + 1}): {error_str}")
|
||||
else:
|
||||
print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}")
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
# Exponential backoff for rate limiting
|
||||
sleep_time = 2 ** attempt
|
||||
print(f" Retrying in {sleep_time}s...")
|
||||
await asyncio.sleep(sleep_time)
|
||||
else:
|
||||
error_msg = f"{model} failed after {max_retries} attempts: {error_str}"
|
||||
print(f"❌ {error_msg}")
|
||||
return model, error_msg, False
|
||||
|
||||
|
||||
async def _run_aggregator_model(
|
||||
system_prompt: str,
|
||||
user_prompt: str,
|
||||
temperature: float = AGGREGATOR_TEMPERATURE,
|
||||
max_tokens: int = None
|
||||
) -> str:
|
||||
"""
|
||||
Run the aggregator model to synthesize the final response.
|
||||
|
||||
Args:
|
||||
system_prompt (str): System prompt with all reference responses
|
||||
user_prompt (str): Original user query
|
||||
temperature (float): Focused temperature for consistent aggregation
|
||||
max_tokens (int): Maximum tokens in final response
|
||||
|
||||
Returns:
|
||||
str: Synthesized final response
|
||||
"""
|
||||
print(f"🧠 Running aggregator model: {AGGREGATOR_MODEL}")
|
||||
|
||||
# Build parameters for the API call
|
||||
api_params = {
|
||||
"model": AGGREGATOR_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
}
|
||||
|
||||
# GPT models (especially gpt-4o-mini) don't support custom temperature values
|
||||
# Only include temperature for non-GPT models
|
||||
if not AGGREGATOR_MODEL.lower().startswith('gpt-'):
|
||||
api_params["temperature"] = temperature
|
||||
|
||||
response = await nous_client.chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
print(f"✅ Aggregation complete ({len(content)} characters)")
|
||||
return content
|
||||
|
||||
|
||||
async def mixture_of_agents_tool(
|
||||
user_prompt: str,
|
||||
reference_models: Optional[List[str]] = None,
|
||||
aggregator_model: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Process a complex query using the Mixture-of-Agents methodology.
|
||||
|
||||
This tool leverages multiple frontier language models to collaboratively solve
|
||||
extremely difficult problems requiring intense reasoning. It's particularly
|
||||
effective for:
|
||||
- Complex mathematical proofs and calculations
|
||||
- Advanced coding problems and algorithm design
|
||||
- Multi-step analytical reasoning tasks
|
||||
- Problems requiring diverse domain expertise
|
||||
- Tasks where single models show limitations
|
||||
|
||||
The MoA approach uses a fixed 2-layer architecture:
|
||||
1. Layer 1: Multiple reference models generate diverse responses in parallel (temp=0.6)
|
||||
2. Layer 2: Aggregator model synthesizes the best elements into final response (temp=0.4)
|
||||
|
||||
Args:
|
||||
user_prompt (str): The complex query or problem to solve
|
||||
reference_models (Optional[List[str]]): Custom reference models to use
|
||||
aggregator_model (Optional[str]): Custom aggregator model to use
|
||||
|
||||
Returns:
|
||||
str: JSON string containing the MoA results with the following structure:
|
||||
{
|
||||
"success": bool,
|
||||
"response": str,
|
||||
"models_used": {
|
||||
"reference_models": List[str],
|
||||
"aggregator_model": str
|
||||
},
|
||||
"processing_time": float
|
||||
}
|
||||
|
||||
Raises:
|
||||
Exception: If MoA processing fails or API key is not set
|
||||
"""
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
debug_call_data = {
|
||||
"parameters": {
|
||||
"user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt,
|
||||
"reference_models": reference_models or REFERENCE_MODELS,
|
||||
"aggregator_model": aggregator_model or AGGREGATOR_MODEL,
|
||||
"reference_temperature": REFERENCE_TEMPERATURE,
|
||||
"aggregator_temperature": AGGREGATOR_TEMPERATURE,
|
||||
"min_successful_references": MIN_SUCCESSFUL_REFERENCES
|
||||
},
|
||||
"error": None,
|
||||
"success": False,
|
||||
"reference_responses_count": 0,
|
||||
"failed_models_count": 0,
|
||||
"failed_models": [],
|
||||
"final_response_length": 0,
|
||||
"processing_time_seconds": 0,
|
||||
"models_used": {}
|
||||
}
|
||||
|
||||
try:
|
||||
print(f"🚀 Starting Mixture-of-Agents processing...")
|
||||
print(f"📝 Query: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
|
||||
|
||||
# Validate API key availability
|
||||
if not os.getenv("NOUS_API_KEY"):
|
||||
raise ValueError("NOUS_API_KEY environment variable not set")
|
||||
|
||||
# Use provided models or defaults
|
||||
ref_models = reference_models or REFERENCE_MODELS
|
||||
agg_model = aggregator_model or AGGREGATOR_MODEL
|
||||
|
||||
print(f"🔄 Using {len(ref_models)} reference models in 2-layer MoA architecture")
|
||||
|
||||
# Layer 1: Generate diverse responses from reference models (with failure handling)
|
||||
print("📡 Layer 1: Generating reference responses...")
|
||||
model_results = await asyncio.gather(*[
|
||||
_run_reference_model_safe(model, user_prompt, REFERENCE_TEMPERATURE)
|
||||
for model in ref_models
|
||||
])
|
||||
|
||||
# Separate successful and failed responses
|
||||
successful_responses = []
|
||||
failed_models = []
|
||||
|
||||
for model_name, content, success in model_results:
|
||||
if success:
|
||||
successful_responses.append(content)
|
||||
else:
|
||||
failed_models.append(model_name)
|
||||
|
||||
successful_count = len(successful_responses)
|
||||
failed_count = len(failed_models)
|
||||
|
||||
print(f"📊 Reference model results: {successful_count} successful, {failed_count} failed")
|
||||
|
||||
if failed_models:
|
||||
print(f"⚠️ Failed models: {', '.join(failed_models)}")
|
||||
|
||||
# Check if we have enough successful responses to proceed
|
||||
if successful_count < MIN_SUCCESSFUL_REFERENCES:
|
||||
raise ValueError(f"Insufficient successful reference models ({successful_count}/{len(ref_models)}). Need at least {MIN_SUCCESSFUL_REFERENCES} successful responses.")
|
||||
|
||||
debug_call_data["reference_responses_count"] = successful_count
|
||||
debug_call_data["failed_models_count"] = failed_count
|
||||
debug_call_data["failed_models"] = failed_models
|
||||
|
||||
# Layer 2: Aggregate responses using the aggregator model
|
||||
print("🧠 Layer 2: Synthesizing final response...")
|
||||
aggregator_system_prompt = _construct_aggregator_prompt(
|
||||
AGGREGATOR_SYSTEM_PROMPT,
|
||||
successful_responses
|
||||
)
|
||||
|
||||
final_response = await _run_aggregator_model(
|
||||
aggregator_system_prompt,
|
||||
user_prompt,
|
||||
AGGREGATOR_TEMPERATURE
|
||||
)
|
||||
|
||||
# Calculate processing time
|
||||
end_time = datetime.datetime.now()
|
||||
processing_time = (end_time - start_time).total_seconds()
|
||||
|
||||
print(f"✅ MoA processing completed in {processing_time:.2f} seconds")
|
||||
|
||||
# Prepare successful response (only final aggregated result, minimal fields)
|
||||
result = {
|
||||
"success": True,
|
||||
"response": final_response,
|
||||
"models_used": {
|
||||
"reference_models": ref_models,
|
||||
"aggregator_model": agg_model
|
||||
}
|
||||
}
|
||||
|
||||
debug_call_data["success"] = True
|
||||
debug_call_data["final_response_length"] = len(final_response)
|
||||
debug_call_data["processing_time_seconds"] = processing_time
|
||||
debug_call_data["models_used"] = result["models_used"]
|
||||
|
||||
# Log debug information
|
||||
_log_debug_call("mixture_of_agents_tool", debug_call_data)
|
||||
_save_debug_log()
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in MoA processing: {str(e)}"
|
||||
print(f"❌ {error_msg}")
|
||||
|
||||
# Calculate processing time even for errors
|
||||
end_time = datetime.datetime.now()
|
||||
processing_time = (end_time - start_time).total_seconds()
|
||||
|
||||
# Prepare error response (minimal fields)
|
||||
result = {
|
||||
"success": False,
|
||||
"response": "MoA processing failed. Please try again or use a single model for this query.",
|
||||
"models_used": {
|
||||
"reference_models": reference_models or REFERENCE_MODELS,
|
||||
"aggregator_model": aggregator_model or AGGREGATOR_MODEL
|
||||
},
|
||||
"error": error_msg
|
||||
}
|
||||
|
||||
debug_call_data["error"] = error_msg
|
||||
debug_call_data["processing_time_seconds"] = processing_time
|
||||
_log_debug_call("mixture_of_agents_tool", debug_call_data)
|
||||
_save_debug_log()
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
def check_nous_api_key() -> bool:
|
||||
"""
|
||||
Check if the Nous Research API key is available in environment variables.
|
||||
|
||||
Returns:
|
||||
bool: True if API key is set, False otherwise
|
||||
"""
|
||||
return bool(os.getenv("NOUS_API_KEY"))
|
||||
|
||||
|
||||
def check_moa_requirements() -> bool:
|
||||
"""
|
||||
Check if all requirements for MoA tools are met.
|
||||
|
||||
Returns:
|
||||
bool: True if requirements are met, False otherwise
|
||||
"""
|
||||
return check_nous_api_key()
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about the current debug session.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Dictionary containing debug session information
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return {
|
||||
"enabled": False,
|
||||
"session_id": None,
|
||||
"log_path": None,
|
||||
"total_calls": 0
|
||||
}
|
||||
|
||||
return {
|
||||
"enabled": True,
|
||||
"session_id": DEBUG_SESSION_ID,
|
||||
"log_path": str(DEBUG_LOG_PATH / f"moa_tools_debug_{DEBUG_SESSION_ID}.json"),
|
||||
"total_calls": len(DEBUG_DATA["tool_calls"])
|
||||
}
|
||||
|
||||
|
||||
def get_available_models() -> Dict[str, List[str]]:
|
||||
"""
|
||||
Get information about available models for MoA processing.
|
||||
|
||||
Returns:
|
||||
Dict[str, List[str]]: Dictionary with reference and aggregator models
|
||||
"""
|
||||
return {
|
||||
"reference_models": REFERENCE_MODELS,
|
||||
"aggregator_models": [AGGREGATOR_MODEL],
|
||||
"supported_models": REFERENCE_MODELS + [AGGREGATOR_MODEL]
|
||||
}
|
||||
|
||||
|
||||
def get_moa_configuration() -> Dict[str, Any]:
|
||||
"""
|
||||
Get the current MoA configuration settings.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Dictionary containing all configuration parameters
|
||||
"""
|
||||
return {
|
||||
"reference_models": REFERENCE_MODELS,
|
||||
"aggregator_model": AGGREGATOR_MODEL,
|
||||
"reference_temperature": REFERENCE_TEMPERATURE,
|
||||
"aggregator_temperature": AGGREGATOR_TEMPERATURE,
|
||||
"min_successful_references": MIN_SUCCESSFUL_REFERENCES,
|
||||
"total_reference_models": len(REFERENCE_MODELS),
|
||||
"failure_tolerance": f"{len(REFERENCE_MODELS) - MIN_SUCCESSFUL_REFERENCES}/{len(REFERENCE_MODELS)} models can fail"
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Simple test/demo when run directly
|
||||
"""
|
||||
print("🤖 Mixture-of-Agents Tool Module")
|
||||
print("=" * 50)
|
||||
|
||||
# Check if API key is available
|
||||
api_available = check_nous_api_key()
|
||||
|
||||
if not api_available:
|
||||
print("❌ NOUS_API_KEY environment variable not set")
|
||||
print("Please set your API key: export NOUS_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://inference-api.nousresearch.com/")
|
||||
exit(1)
|
||||
else:
|
||||
print("✅ Nous Research API key found")
|
||||
|
||||
print("🛠️ MoA tools ready for use!")
|
||||
|
||||
# Show current configuration
|
||||
config = get_moa_configuration()
|
||||
print(f"\n⚙️ Current Configuration:")
|
||||
print(f" 🤖 Reference models ({len(config['reference_models'])}): {', '.join(config['reference_models'])}")
|
||||
print(f" 🧠 Aggregator model: {config['aggregator_model']}")
|
||||
print(f" 🌡️ Reference temperature: {config['reference_temperature']}")
|
||||
print(f" 🌡️ Aggregator temperature: {config['aggregator_temperature']}")
|
||||
print(f" 🛡️ Failure tolerance: {config['failure_tolerance']}")
|
||||
print(f" 📊 Minimum successful models: {config['min_successful_references']}")
|
||||
|
||||
# Show debug mode status
|
||||
if DEBUG_MODE:
|
||||
print(f"\n🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
|
||||
print(f" Debug logs will be saved to: ./logs/moa_tools_debug_{DEBUG_SESSION_ID}.json")
|
||||
else:
|
||||
print("\n🐛 Debug mode disabled (set MOA_TOOLS_DEBUG=true to enable)")
|
||||
|
||||
print("\nBasic usage:")
|
||||
print(" from mixture_of_agents_tool import mixture_of_agents_tool")
|
||||
print(" import asyncio")
|
||||
print("")
|
||||
print(" async def main():")
|
||||
print(" result = await mixture_of_agents_tool(")
|
||||
print(" user_prompt='Solve this complex mathematical proof...'")
|
||||
print(" )")
|
||||
print(" print(result)")
|
||||
print(" asyncio.run(main())")
|
||||
|
||||
print("\nBest use cases:")
|
||||
print(" - Complex mathematical proofs and calculations")
|
||||
print(" - Advanced coding problems and algorithm design")
|
||||
print(" - Multi-step analytical reasoning tasks")
|
||||
print(" - Problems requiring diverse domain expertise")
|
||||
print(" - Tasks where single models show limitations")
|
||||
|
||||
print("\nPerformance characteristics:")
|
||||
print(" - Higher latency due to multiple model calls")
|
||||
print(" - Significantly improved quality for complex tasks")
|
||||
print(" - Parallel processing for efficiency")
|
||||
print(f" - Optimized temperatures: {REFERENCE_TEMPERATURE} for reference models, {AGGREGATOR_TEMPERATURE} for aggregation")
|
||||
print(" - Token-efficient: only returns final aggregated response")
|
||||
print(" - Resilient: continues with partial model failures")
|
||||
print(f" - Configurable: easy to modify models and settings at top of file")
|
||||
print(" - State-of-the-art results on challenging benchmarks")
|
||||
|
||||
print("\nDebug mode:")
|
||||
print(" # Enable debug logging")
|
||||
print(" export MOA_TOOLS_DEBUG=true")
|
||||
print(" # Debug logs capture all MoA processing steps and metrics")
|
||||
print(" # Logs saved to: ./logs/moa_tools_debug_UUID.json")
|
||||
234
tools/terminal_tool.py
Normal file
234
tools/terminal_tool.py
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Terminal Tool Module
|
||||
|
||||
This module provides a single terminal tool using Hecate's VM infrastructure.
|
||||
It wraps Hecate's functionality to provide a simple interface for executing commands
|
||||
on Morph VMs with automatic lifecycle management.
|
||||
|
||||
Available tool:
|
||||
- terminal_tool: Execute commands with optional interactive session support
|
||||
|
||||
Usage:
|
||||
from terminal_tool import terminal_tool
|
||||
|
||||
# Execute a single command
|
||||
result = terminal_tool("ls -la")
|
||||
|
||||
# Execute in an interactive session
|
||||
result = terminal_tool("python", input_keys="print('hello')\\nexit()\\n")
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Optional, Dict, Any
|
||||
from hecate import run_tool_with_lifecycle_management
|
||||
from morphcloud._llm import ToolCall
|
||||
|
||||
# Detailed description for the terminal tool based on Hermes Terminal system prompt
|
||||
TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure, persistent Linux VM environment with full interactive application support.
|
||||
|
||||
**Environment:**
|
||||
- Minimal Debian-based OS with internet access
|
||||
- Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
|
||||
- **Full state persistence across tool calls**: current directory (pwd), environment variables, activated virtual environments (conda/venv), running processes, and command history all persist between consecutive tool calls
|
||||
- Session state managed automatically via tmux
|
||||
|
||||
**Command Execution:**
|
||||
- Simple commands: Just provide the 'command' parameter
|
||||
- Background processes: Set 'background': True for servers/long-running tasks
|
||||
- Interactive applications automatically detected and handled
|
||||
|
||||
**Interactive Applications (TUIs/Pagers/Prompts):**
|
||||
When commands enter interactive mode (vim, nano, less, git prompts, package managers, etc.), you'll receive screen content with "frozen" status. This is NORMAL - the session is still active and waiting for input.
|
||||
|
||||
**To interact with frozen sessions:**
|
||||
1. Use 'input_keys' parameter with keystrokes to send
|
||||
2. System auto-detects and uses the active session
|
||||
3. Session stays active until application exits
|
||||
|
||||
**Special Key Syntax for input_keys:**
|
||||
- `<ESC>`: Escape key
|
||||
- `<ENTER>`: Enter/Return
|
||||
- `<CTRL+C>`, `<CTRL+D>`, `<CTRL+Z>`: Control combinations
|
||||
- `<UP>`, `<DOWN>`, `<LEFT>`, `<RIGHT>`: Arrow keys
|
||||
- `<TAB>`, `<BACKSPACE>`: Tab and Backspace
|
||||
- `<F1>` through `<F12>`: Function keys
|
||||
- `<SHIFT+TAB>`: Shift+Tab
|
||||
- Uppercase letters for Shift+letter (e.g., 'V' for Shift+V)
|
||||
- Symbols for Shift+number (e.g., '!' for Shift+1, ':' for Shift+;)
|
||||
|
||||
**Examples:**
|
||||
- Start vim: `{"command": "vim file.txt"}`
|
||||
- Type in vim: `{"input_keys": "iHello World<ESC>"}`
|
||||
- Save and quit: `{"input_keys": ":wq<ENTER>"}`
|
||||
- Navigate in less: `{"input_keys": "j"}`
|
||||
- Quit less: `{"input_keys": "q"}`
|
||||
|
||||
**Best Practices:**
|
||||
- Run servers/long processes in background with separate tool calls
|
||||
- Chain multiple foreground commands in single call if needed
|
||||
- Monitor disk usage for large tasks, clean up to free space
|
||||
- Test components incrementally with mock inputs
|
||||
- Install whatever tools needed - full system access provided"""
|
||||
|
||||
def terminal_tool(
|
||||
command: Optional[str] = None,
|
||||
input_keys: Optional[str] = None,
|
||||
session_id: Optional[str] = None,
|
||||
background: bool = False,
|
||||
idle_threshold: float = 5.0,
|
||||
timeout: Optional[int] = None
|
||||
) -> str:
|
||||
"""
|
||||
Execute a command on a Morph VM with optional interactive session support.
|
||||
|
||||
This tool uses Hecate's VM lifecycle management to automatically create
|
||||
and manage VMs. VMs are reused within the configured lifetime window
|
||||
and automatically cleaned up after inactivity.
|
||||
|
||||
Args:
|
||||
command: The command to execute (optional if continuing existing session)
|
||||
input_keys: Keystrokes to send to interactive session (e.g., "hello\\n")
|
||||
session_id: ID of existing session to continue (optional)
|
||||
background: Whether to run the command in the background (default: False)
|
||||
idle_threshold: Seconds to wait for output before considering session idle (default: 5.0)
|
||||
timeout: Command timeout in seconds (optional)
|
||||
|
||||
Returns:
|
||||
str: JSON string containing command output, session info, exit code, and any errors
|
||||
|
||||
Examples:
|
||||
# Execute a simple command
|
||||
>>> result = terminal_tool(command="ls -la /tmp")
|
||||
|
||||
# Start an interactive Python session
|
||||
>>> result = terminal_tool(command="python3")
|
||||
>>> session_data = json.loads(result)
|
||||
>>> session_id = session_data["session_id"]
|
||||
|
||||
# Send input to the session
|
||||
>>> result = terminal_tool(input_keys="print('Hello')\\n", session_id=session_id)
|
||||
|
||||
# Run a background task
|
||||
>>> result = terminal_tool(command="sleep 60", background=True)
|
||||
"""
|
||||
try:
|
||||
# Build tool input based on provided parameters
|
||||
tool_input = {}
|
||||
|
||||
if command:
|
||||
tool_input["command"] = command
|
||||
if input_keys:
|
||||
tool_input["input_keys"] = input_keys
|
||||
if session_id:
|
||||
tool_input["session_id"] = session_id
|
||||
if background:
|
||||
tool_input["background"] = background
|
||||
if idle_threshold != 5.0:
|
||||
tool_input["idle_threshold"] = idle_threshold
|
||||
if timeout is not None:
|
||||
tool_input["timeout"] = timeout
|
||||
|
||||
tool_call = ToolCall(
|
||||
name="run_command",
|
||||
input=tool_input
|
||||
)
|
||||
|
||||
# Execute with lifecycle management
|
||||
result = run_tool_with_lifecycle_management(tool_call)
|
||||
|
||||
# Format the result with all possible fields
|
||||
# Map hecate's "stdout" to "output" for compatibility
|
||||
formatted_result = {
|
||||
"output": result.get("stdout", result.get("output", "")),
|
||||
"screen": result.get("screen", ""),
|
||||
"session_id": result.get("session_id"),
|
||||
"exit_code": result.get("returncode", result.get("exit_code", -1)),
|
||||
"error": result.get("error"),
|
||||
"status": "active" if result.get("session_id") else "ended"
|
||||
}
|
||||
|
||||
return json.dumps(formatted_result)
|
||||
|
||||
except Exception as e:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"screen": "",
|
||||
"session_id": None,
|
||||
"exit_code": -1,
|
||||
"error": f"Failed to execute terminal command: {str(e)}",
|
||||
"status": "error"
|
||||
})
|
||||
|
||||
def check_hecate_requirements() -> bool:
|
||||
"""
|
||||
Check if all requirements for terminal tools are met.
|
||||
|
||||
Returns:
|
||||
bool: True if all requirements are met, False otherwise
|
||||
"""
|
||||
# Check for required environment variables
|
||||
required_vars = ["MORPH_API_KEY"]
|
||||
optional_vars = ["OPENAI_API_KEY"] # Needed for Hecate's LLM features
|
||||
|
||||
missing_required = [var for var in required_vars if not os.getenv(var)]
|
||||
missing_optional = [var for var in optional_vars if not os.getenv(var)]
|
||||
|
||||
if missing_required:
|
||||
print(f"Missing required environment variables: {', '.join(missing_required)}")
|
||||
return False
|
||||
|
||||
if missing_optional:
|
||||
print(f"Warning: Missing optional environment variables: {', '.join(missing_optional)}")
|
||||
print(" (Some Hecate features may be limited)")
|
||||
|
||||
# Check if Hecate is importable
|
||||
try:
|
||||
import hecate
|
||||
return True
|
||||
except ImportError:
|
||||
print("Hecate is not installed. Please install it with: pip install hecate")
|
||||
return False
|
||||
|
||||
# Module-level initialization check
|
||||
_requirements_met = check_hecate_requirements()
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Simple test/demo when run directly
|
||||
"""
|
||||
print("Terminal Tool Module")
|
||||
print("=" * 40)
|
||||
|
||||
if not _requirements_met:
|
||||
print("Requirements not met. Please check the messages above.")
|
||||
exit(1)
|
||||
|
||||
print("All requirements met!")
|
||||
print("\nAvailable Tool:")
|
||||
print(" - terminal_tool: Execute commands with optional interactive session support")
|
||||
|
||||
print("\nUsage Examples:")
|
||||
print(" # Execute a command")
|
||||
print(" result = terminal_tool(command='ls -la')")
|
||||
print(" ")
|
||||
print(" # Start an interactive session")
|
||||
print(" result = terminal_tool(command='python3')")
|
||||
print(" session_data = json.loads(result)")
|
||||
print(" session_id = session_data['session_id']")
|
||||
print(" ")
|
||||
print(" # Send input to the session")
|
||||
print(" result = terminal_tool(")
|
||||
print(" input_keys='print(\"Hello\")\\\\n',")
|
||||
print(" session_id=session_id")
|
||||
print(" )")
|
||||
print(" ")
|
||||
print(" # Run a background task")
|
||||
print(" result = terminal_tool(command='sleep 60', background=True)")
|
||||
|
||||
print("\nEnvironment Variables:")
|
||||
print(f" MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
|
||||
print(f" OPENAI_API_KEY: {'Set' if os.getenv('OPENAI_API_KEY') else 'Not set (optional)'}")
|
||||
print(f" HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300)")
|
||||
print(f" HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_p5294qxt')} (default: snapshot_p5294qxt)")
|
||||
346
tools/vision_tools.py
Normal file
346
tools/vision_tools.py
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Vision Tools Module
|
||||
|
||||
This module provides vision analysis tools that work with image URLs.
|
||||
Uses Gemini Flash via Nous Research API for intelligent image understanding.
|
||||
|
||||
Available tools:
|
||||
- vision_analyze_tool: Analyze images from URLs with custom prompts
|
||||
|
||||
Features:
|
||||
- Comprehensive image description
|
||||
- Context-aware analysis based on user queries
|
||||
- Proper error handling and validation
|
||||
- Debug logging support
|
||||
|
||||
Usage:
|
||||
from vision_tools import vision_analyze_tool
|
||||
import asyncio
|
||||
|
||||
# Analyze an image
|
||||
result = await vision_analyze_tool(
|
||||
image_url="https://example.com/image.jpg",
|
||||
user_prompt="What architectural style is this building?"
|
||||
)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
import uuid
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
# Initialize Nous Research API client for vision processing
|
||||
nous_client = AsyncOpenAI(
|
||||
api_key=os.getenv("NOUS_API_KEY"),
|
||||
base_url="https://inference-api.nousresearch.com/v1"
|
||||
)
|
||||
|
||||
# Configuration for vision processing
|
||||
DEFAULT_VISION_MODEL = "gemini-2.5-flash"
|
||||
|
||||
# Debug mode configuration
|
||||
DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
|
||||
DEBUG_SESSION_ID = str(uuid.uuid4())
|
||||
DEBUG_LOG_PATH = Path("./logs")
|
||||
DEBUG_DATA = {
|
||||
"session_id": DEBUG_SESSION_ID,
|
||||
"start_time": datetime.datetime.now().isoformat(),
|
||||
"debug_enabled": DEBUG_MODE,
|
||||
"tool_calls": []
|
||||
} if DEBUG_MODE else None
|
||||
|
||||
# Create logs directory if debug mode is enabled
|
||||
if DEBUG_MODE:
|
||||
DEBUG_LOG_PATH.mkdir(exist_ok=True)
|
||||
print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
|
||||
|
||||
|
||||
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Log a debug call entry to the global debug data structure.
|
||||
|
||||
Args:
|
||||
tool_name (str): Name of the tool being called
|
||||
call_data (Dict[str, Any]): Data about the call including parameters and results
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return
|
||||
|
||||
call_entry = {
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
"tool_name": tool_name,
|
||||
**call_data
|
||||
}
|
||||
|
||||
DEBUG_DATA["tool_calls"].append(call_entry)
|
||||
|
||||
|
||||
def _save_debug_log() -> None:
|
||||
"""
|
||||
Save the current debug data to a JSON file in the logs directory.
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return
|
||||
|
||||
try:
|
||||
debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
|
||||
debug_filepath = DEBUG_LOG_PATH / debug_filename
|
||||
|
||||
# Update end time
|
||||
DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
|
||||
DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
|
||||
|
||||
with open(debug_filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"🐛 Vision debug log saved: {debug_filepath}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving vision debug log: {str(e)}")
|
||||
|
||||
|
||||
def _validate_image_url(url: str) -> bool:
|
||||
"""
|
||||
Basic validation of image URL format.
|
||||
|
||||
Args:
|
||||
url (str): The URL to validate
|
||||
|
||||
Returns:
|
||||
bool: True if URL appears to be valid, False otherwise
|
||||
"""
|
||||
if not url or not isinstance(url, str):
|
||||
return False
|
||||
|
||||
# Check if it's a valid URL format
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return False
|
||||
|
||||
# Check for common image extensions (optional, as URLs may not have extensions)
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
|
||||
|
||||
return True # Allow all HTTP/HTTPS URLs for flexibility
|
||||
|
||||
|
||||
async def vision_analyze_tool(
|
||||
image_url: str,
|
||||
user_prompt: str,
|
||||
model: str = DEFAULT_VISION_MODEL
|
||||
) -> str:
|
||||
"""
|
||||
Analyze an image from a URL using vision AI.
|
||||
|
||||
This tool processes images using Gemini Flash via Nous Research API.
|
||||
The user_prompt parameter is expected to be pre-formatted by the calling
|
||||
function (typically model_tools.py) to include both full description
|
||||
requests and specific questions.
|
||||
|
||||
Args:
|
||||
image_url (str): The URL of the image to analyze
|
||||
user_prompt (str): The pre-formatted prompt for the vision model
|
||||
model (str): The vision model to use (default: gemini-2.5-flash)
|
||||
|
||||
Returns:
|
||||
str: JSON string containing the analysis results with the following structure:
|
||||
{
|
||||
"success": bool,
|
||||
"analysis": str (defaults to error message if None)
|
||||
}
|
||||
|
||||
Raises:
|
||||
Exception: If analysis fails or API key is not set
|
||||
"""
|
||||
debug_call_data = {
|
||||
"parameters": {
|
||||
"image_url": image_url,
|
||||
"user_prompt": user_prompt,
|
||||
"model": model
|
||||
},
|
||||
"error": None,
|
||||
"success": False,
|
||||
"analysis_length": 0,
|
||||
"model_used": model
|
||||
}
|
||||
|
||||
try:
|
||||
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
|
||||
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
|
||||
|
||||
# Validate image URL
|
||||
if not _validate_image_url(image_url):
|
||||
raise ValueError("Invalid image URL format. Must start with http:// or https://")
|
||||
|
||||
# Check API key availability
|
||||
if not os.getenv("NOUS_API_KEY"):
|
||||
raise ValueError("NOUS_API_KEY environment variable not set")
|
||||
|
||||
# Use the prompt as provided (model_tools.py now handles full description formatting)
|
||||
comprehensive_prompt = user_prompt
|
||||
|
||||
# Prepare the message with image URL format
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": comprehensive_prompt
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": image_url
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
print(f"🧠 Processing image with {model}...")
|
||||
|
||||
# Call the vision API
|
||||
response = await nous_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.1, # Low temperature for consistent analysis
|
||||
max_tokens=2000 # Generous limit for detailed analysis
|
||||
)
|
||||
|
||||
# Extract the analysis
|
||||
analysis = response.choices[0].message.content.strip()
|
||||
analysis_length = len(analysis)
|
||||
|
||||
print(f"✅ Image analysis completed ({analysis_length} characters)")
|
||||
|
||||
# Prepare successful response
|
||||
result = {
|
||||
"success": True,
|
||||
"analysis": analysis or "There was a problem with the request and the image could not be analyzed."
|
||||
}
|
||||
|
||||
debug_call_data["success"] = True
|
||||
debug_call_data["analysis_length"] = analysis_length
|
||||
|
||||
# Log debug information
|
||||
_log_debug_call("vision_analyze_tool", debug_call_data)
|
||||
_save_debug_log()
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing image: {str(e)}"
|
||||
print(f"❌ {error_msg}")
|
||||
|
||||
# Prepare error response
|
||||
result = {
|
||||
"success": False,
|
||||
"analysis": "There was a problem with the request and the image could not be analyzed."
|
||||
}
|
||||
|
||||
debug_call_data["error"] = error_msg
|
||||
_log_debug_call("vision_analyze_tool", debug_call_data)
|
||||
_save_debug_log()
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
|
||||
def check_nous_api_key() -> bool:
|
||||
"""
|
||||
Check if the Nous Research API key is available in environment variables.
|
||||
|
||||
Returns:
|
||||
bool: True if API key is set, False otherwise
|
||||
"""
|
||||
return bool(os.getenv("NOUS_API_KEY"))
|
||||
|
||||
|
||||
def check_vision_requirements() -> bool:
|
||||
"""
|
||||
Check if all requirements for vision tools are met.
|
||||
|
||||
Returns:
|
||||
bool: True if requirements are met, False otherwise
|
||||
"""
|
||||
return check_nous_api_key()
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about the current debug session.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Dictionary containing debug session information
|
||||
"""
|
||||
if not DEBUG_MODE or not DEBUG_DATA:
|
||||
return {
|
||||
"enabled": False,
|
||||
"session_id": None,
|
||||
"log_path": None,
|
||||
"total_calls": 0
|
||||
}
|
||||
|
||||
return {
|
||||
"enabled": True,
|
||||
"session_id": DEBUG_SESSION_ID,
|
||||
"log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
|
||||
"total_calls": len(DEBUG_DATA["tool_calls"])
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Simple test/demo when run directly
|
||||
"""
|
||||
print("👁️ Vision Tools Module")
|
||||
print("=" * 40)
|
||||
|
||||
# Check if API key is available
|
||||
api_available = check_nous_api_key()
|
||||
|
||||
if not api_available:
|
||||
print("❌ NOUS_API_KEY environment variable not set")
|
||||
print("Please set your API key: export NOUS_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://inference-api.nousresearch.com/")
|
||||
exit(1)
|
||||
else:
|
||||
print("✅ Nous Research API key found")
|
||||
|
||||
print("🛠️ Vision tools ready for use!")
|
||||
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
|
||||
|
||||
# Show debug mode status
|
||||
if DEBUG_MODE:
|
||||
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
|
||||
print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
|
||||
else:
|
||||
print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
|
||||
|
||||
print("\nBasic usage:")
|
||||
print(" from vision_tools import vision_analyze_tool")
|
||||
print(" import asyncio")
|
||||
print("")
|
||||
print(" async def main():")
|
||||
print(" result = await vision_analyze_tool(")
|
||||
print(" image_url='https://example.com/image.jpg',")
|
||||
print(" user_prompt='What do you see in this image?'")
|
||||
print(" )")
|
||||
print(" print(result)")
|
||||
print(" asyncio.run(main())")
|
||||
|
||||
print("\nExample prompts:")
|
||||
print(" - 'What architectural style is this building?'")
|
||||
print(" - 'Describe the emotions and mood in this image'")
|
||||
print(" - 'What text can you read in this image?'")
|
||||
print(" - 'Identify any safety hazards visible'")
|
||||
print(" - 'What products or brands are shown?'")
|
||||
|
||||
print("\nDebug mode:")
|
||||
print(" # Enable debug logging")
|
||||
print(" export VISION_TOOLS_DEBUG=true")
|
||||
print(" # Debug logs capture all vision analysis calls and results")
|
||||
print(" # Logs saved to: ./logs/vision_tools_debug_UUID.json")
|
||||
1009
tools/web_tools.py
Normal file
1009
tools/web_tools.py
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue