Update environment configuration and enhance terminal tool integration
- Modified `.env.example` to set the default terminal environment to 'singularity' and updated Docker and Singularity image references for better compatibility. - Enhanced `run_mixed_tasks.sh` and `run_terminal_tasks.sh` scripts to utilize the new Singularity setup, including improved logging and cache directory management. - Introduced functionality in `terminal_tool.py` to automatically build and cache SIF images from Docker URLs, streamlining the execution environment setup. - Updated logging messages for clarity on image usage and cache directory paths.
This commit is contained in:
parent
7ea17bb957
commit
771cf41fea
4 changed files with 173 additions and 62 deletions
|
|
@ -13,19 +13,31 @@ LOG_FILE="logs/mixed_tasks_$(date +%Y%m%d_%H%M%S).log"
|
|||
echo "📝 Logging output to: $LOG_FILE"
|
||||
echo "🔀 Running mixed browser+terminal tasks with mixed_tasks distribution"
|
||||
|
||||
# Set terminal environment (Modal sandboxes recommended for safety)
|
||||
export TERMINAL_ENV=modal
|
||||
export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
# Set terminal environment
|
||||
# SIF images are automatically built/cached by terminal_tool.py
|
||||
export TERMINAL_ENV=singularity
|
||||
export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
export TERMINAL_TIMEOUT=300
|
||||
|
||||
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
|
||||
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
|
||||
CACHE_BASE="/scratch/$USER/.apptainer"
|
||||
else
|
||||
CACHE_BASE="/tmp/$USER/.apptainer"
|
||||
fi
|
||||
export APPTAINER_CACHEDIR="$CACHE_BASE"
|
||||
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
|
||||
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
|
||||
|
||||
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="mixed-browser-terminal-tasks.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="mixed_tasks" \
|
||||
--distribution="mixed_tasks" \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--model="moonshotai/kimi-k2.5" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=25 \
|
||||
--max_turns=60 \
|
||||
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
# Terminal-focused data generation run
|
||||
# Uses nous-terminal-tasks.jsonl (597 tasks)
|
||||
# Distribution: terminal 97%, web 15%, browser 10%, vision 8%, image_gen 3%
|
||||
# Distribution: terminal 97%, web 15%, browser 0%, vision 8%, image_gen 3%
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
|
@ -13,8 +13,10 @@ LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log"
|
|||
echo "📝 Logging output to: $LOG_FILE"
|
||||
echo "💻 Running terminal-focused tasks with terminal_tasks distribution"
|
||||
|
||||
# Set terminal environment (using Singularity for containerized execution)
|
||||
# Set terminal environment
|
||||
# SIF images are automatically built/cached by terminal_tool.py
|
||||
export TERMINAL_ENV=singularity
|
||||
export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
export TERMINAL_TIMEOUT=300
|
||||
|
||||
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
|
||||
|
|
@ -27,28 +29,8 @@ export APPTAINER_CACHEDIR="$CACHE_BASE"
|
|||
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
|
||||
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
|
||||
|
||||
# Pre-build SIF image if it doesn't exist (avoids 40 workers all downloading simultaneously)
|
||||
SIF_IMAGE="$CACHE_BASE/python-nodejs-3.11-20.sif"
|
||||
DOCKER_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
|
||||
if [ ! -f "$SIF_IMAGE" ]; then
|
||||
echo "🔨 Building Singularity image (one-time setup)..."
|
||||
echo " Source: $DOCKER_IMAGE"
|
||||
echo " Target: $SIF_IMAGE"
|
||||
apptainer build "$SIF_IMAGE" "$DOCKER_IMAGE"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "❌ Failed to build SIF image. Falling back to docker:// URL"
|
||||
export TERMINAL_SINGULARITY_IMAGE="$DOCKER_IMAGE"
|
||||
else
|
||||
echo "✅ SIF image built successfully"
|
||||
export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
|
||||
fi
|
||||
else
|
||||
echo "✅ Using pre-built SIF image: $SIF_IMAGE"
|
||||
export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE"
|
||||
fi
|
||||
|
||||
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
|
||||
echo "🐳 Image: $TERMINAL_SINGULARITY_IMAGE (auto-converted to SIF on first use)"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="nous-terminal-tasks.jsonl" \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue