Enhance logging and tool initialization for improved performance

- Updated logging configuration in `run_agent.py` to suppress debug messages from additional third-party libraries, reducing noise in logs.
- Enhanced shell scripts for terminal tasks to utilize Singularity for containerized execution, including pre-build SIF image logic and improved logging.
- Refactored tool initialization in `mixture_of_agents_tool.py`, `vision_tools.py`, and `web_tools.py` to implement lazy loading of API clients, optimizing resource usage and error handling.
- Updated ephemeral system prompts in shell scripts to provide clearer guidance on task execution and resource usage.
This commit is contained in:
teknium 2026-01-29 19:59:59 +00:00
parent 5438b64e32
commit 4c05ef0ba8
6 changed files with 118 additions and 37 deletions

View file

@ -28,7 +28,7 @@ python batch_runner.py \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=25 \
--max_turns=60 \
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully." \
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"

View file

@ -13,21 +13,55 @@ LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
echo "💻 Running terminal-focused tasks with terminal_tasks distribution"
# Set terminal environment (Modal sandboxes recommended for safety)
export TERMINAL_ENV=modal
export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# Set terminal environment (using Singularity for containerized execution)
export TERMINAL_ENV=singularity
export TERMINAL_TIMEOUT=300
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
CACHE_BASE="/scratch/$USER/.apptainer"
else
CACHE_BASE="/tmp/$USER/.apptainer"
fi
export APPTAINER_CACHEDIR="$CACHE_BASE"
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
# Pre-build SIF image if it doesn't exist (avoids 40 workers all downloading simultaneously)
SIF_IMAGE="$CACHE_BASE/python-nodejs-3.11-20.sif"
DOCKER_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
if [ ! -f "$SIF_IMAGE" ]; then
echo "🔨 Building Singularity image (one-time setup)..."
echo " Source: $DOCKER_IMAGE"
echo " Target: $SIF_IMAGE"
apptainer build "$SIF_IMAGE" "$DOCKER_IMAGE"
if [ $? -ne 0 ]; then
echo "❌ Failed to build SIF image. Falling back to docker:// URL"
export TERMINAL_SINGULARITY_IMAGE="$DOCKER_IMAGE"
else
echo "✅ SIF image built successfully"
export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
fi
else
echo "✅ Using pre-built SIF image: $SIF_IMAGE"
export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
fi
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
python batch_runner.py \
--dataset_file="nous-terminal-tasks.jsonl" \
--batch_size=20 \
--run_name="terminal_tasks" \
--batch_size=5 \
--run_name="terminal_tasks-kimi-k2.5" \
--distribution="terminal_tasks" \
--model="z-ai/glm-4.7" \
--model="moonshotai/kimi-k2.5" \
--verbose \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=40 \
--num_workers=80 \
--max_turns=60 \
--providers_ignored="Novita" \
--resume \
--ephemeral_system_prompt="You have access to a terminal tool for executing commands and completing coding, system administration, and computing tasks. Use the terminal to write code, run scripts, install packages (use --break-system-packages with pip if needed), manipulate files, and verify your work. Always test and validate code you create. Do not use interactive tools like vim, nano, or python REPL. If git output is large, pipe to cat. When web search is available, use it to look up documentation, APIs, or best practices. If browser tools are available, use them for web interactions that require page manipulation. Do not use the terminal to communicate with the user - only your final response will be shown to them." \
2>&1 | tee "$LOG_FILE"