From b66c093316b9b0105b7781e665be25efeec0238f Mon Sep 17 00:00:00 2001 From: teknium Date: Wed, 14 Jan 2026 13:41:09 +0000 Subject: [PATCH] add default datagen example script --- run_datagen_images.sh | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) mode change 100644 => 100755 run_datagen_images.sh diff --git a/run_datagen_images.sh b/run_datagen_images.sh old mode 100644 new mode 100755 index 79e448ec..216984d7 --- a/run_datagen_images.sh +++ b/run_datagen_images.sh @@ -1,12 +1,26 @@ +#!/bin/bash + +# Create logs directory if it doesn't exist +mkdir -p logs + +# Generate a timestamp for the log file +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +LOG_FILE="logs/imagen_eval_gpt5_${TIMESTAMP}.log" + +echo "📝 Logging output to: $LOG_FILE" + python batch_runner.py \ - --dataset_file="hermes-agent-imagen-data/hermes_agent_imagen_eval.jsonl" \ + --dataset_file="source-data/hermes-agent-imagen-data/hermes_agent_imagen_train_sft.jsonl" \ --batch_size=10 \ - --run_name="imagen_eval_gpt5" \ + --run_name="imagen_train_sft_glm4.7" \ --distribution="image_gen" \ - --model="gpt-5" \ - --base_url="https://api.openai.com/v1" \ - --api_key="${OPENAI_API_KEY}" \ - --num_workers=4 \ - --max_turns=5 \ + --model="z-ai/glm-4.7" \ + --base_url="https://openrouter.ai/api/v1" \ + --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ + --num_workers=25 \ + --max_turns=25 \ --verbose \ - --ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt." \ No newline at end of file + --ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt." \ + 2>&1 | tee "$LOG_FILE" + +echo "✅ Log saved to: $LOG_FILE"