Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

This commit is contained in:
teknium 2025-10-08 02:33:58 +00:00
parent a398d320b7
commit d36790de91
3 changed files with 75 additions and 8 deletions

View file

@ -11,6 +11,7 @@ An AI agent with advanced tool-calling capabilities, featuring a flexible toolse
- **Creative Tools**: Generate images from text prompts - **Creative Tools**: Generate images from text prompts
- **Toolsets System**: Organize tools into logical groups for different scenarios - **Toolsets System**: Organize tools into logical groups for different scenarios
- **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking - **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking
- **Ephemeral System Prompts**: Guide model behavior without polluting training datasets
## Setup ## Setup
@ -164,8 +165,30 @@ python batch_runner.py \
**Quick Start:** See [QUICKSTART_BATCH.md](QUICKSTART_BATCH.md) for a 5-minute getting started guide. **Quick Start:** See [QUICKSTART_BATCH.md](QUICKSTART_BATCH.md) for a 5-minute getting started guide.
**Full Documentation:** See [BATCH_PROCESSING.md](BATCH_PROCESSING.md) for comprehensive documentation. **Full Documentation:** See [BATCH_PROCESSING.md](BATCH_PROCESSING.md) for comprehensive documentation.
### Ephemeral System Prompts
The ephemeral system prompt feature allows you to guide the model's behavior during batch processing **without** saving that prompt to the training dataset trajectories. This is useful for:
- Guiding model behavior during data collection
- Adding task-specific instructions
- Keeping saved trajectories clean and focused on tool-calling format
**Example:**
```bash
python batch_runner.py \
--dataset_file=prompts.jsonl \
--batch_size=10 \
--run_name=my_run \
--ephemeral_system_prompt="You are a helpful assistant focused on image generation."
```
The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
**Documentation:** See [docs/ephemeral_system_prompt.md](docs/ephemeral_system_prompt.md) for complete details.
## Command Line Arguments ## Command Line Arguments
**Single Agent (`run_agent.py`):**
- `--query`: The question or task for the agent - `--query`: The question or task for the agent
- `--model`: Model to use (default: claude-opus-4-20250514) - `--model`: Model to use (default: claude-opus-4-20250514)
- `--api_key`: API key for authentication - `--api_key`: API key for authentication
@ -176,6 +199,16 @@ python batch_runner.py \
- `--list_tools`: List all available toolsets and tools - `--list_tools`: List all available toolsets and tools
- `--save_trajectories`: Save conversation trajectories to JSONL files - `--save_trajectories`: Save conversation trajectories to JSONL files
**Batch Processing (`batch_runner.py`):**
- `--dataset_file`: Path to JSONL file with prompts
- `--batch_size`: Number of prompts per batch
- `--run_name`: Name for this run (for output/checkpointing)
- `--distribution`: Toolset distribution to use (default: "default")
- `--num_workers`: Number of parallel workers (default: 4)
- `--resume`: Resume from checkpoint if interrupted
- `--ephemeral_system_prompt`: System prompt used during execution but NOT saved to trajectories
- `--list_distributions`: List available toolset distributions
## Environment Variables ## Environment Variables
All environment variables can be configured in the `.env` file (copy from `.env.example`). All environment variables can be configured in the `.env` file (copy from `.env.example`).

View file

@ -141,7 +141,8 @@ def _process_single_prompt(
max_iterations=config["max_iterations"], max_iterations=config["max_iterations"],
enabled_toolsets=selected_toolsets, enabled_toolsets=selected_toolsets,
save_trajectories=False, # We handle saving ourselves save_trajectories=False, # We handle saving ourselves
verbose_logging=config.get("verbose", False) verbose_logging=config.get("verbose", False),
ephemeral_system_prompt=config.get("ephemeral_system_prompt")
) )
# Run the agent # Run the agent
@ -299,7 +300,8 @@ class BatchRunner:
api_key: str = None, api_key: str = None,
model: str = "claude-opus-4-20250514", model: str = "claude-opus-4-20250514",
num_workers: int = 4, num_workers: int = 4,
verbose: bool = False verbose: bool = False,
ephemeral_system_prompt: str = None
): ):
""" """
Initialize the batch runner. Initialize the batch runner.
@ -315,6 +317,7 @@ class BatchRunner:
model (str): Model name to use model (str): Model name to use
num_workers (int): Number of parallel workers num_workers (int): Number of parallel workers
verbose (bool): Enable verbose logging verbose (bool): Enable verbose logging
ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
""" """
self.dataset_file = Path(dataset_file) self.dataset_file = Path(dataset_file)
self.batch_size = batch_size self.batch_size = batch_size
@ -326,6 +329,7 @@ class BatchRunner:
self.model = model self.model = model
self.num_workers = num_workers self.num_workers = num_workers
self.verbose = verbose self.verbose = verbose
self.ephemeral_system_prompt = ephemeral_system_prompt
# Validate distribution # Validate distribution
if not validate_distribution(distribution): if not validate_distribution(distribution):
@ -355,6 +359,9 @@ class BatchRunner:
print(f" Distribution: {self.distribution}") print(f" Distribution: {self.distribution}")
print(f" Output directory: {self.output_dir}") print(f" Output directory: {self.output_dir}")
print(f" Workers: {self.num_workers}") print(f" Workers: {self.num_workers}")
if self.ephemeral_system_prompt:
prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
print(f" 🔒 Ephemeral system prompt: '{prompt_preview}'")
def _load_dataset(self) -> List[Dict[str, Any]]: def _load_dataset(self) -> List[Dict[str, Any]]:
""" """
@ -477,7 +484,8 @@ class BatchRunner:
"max_iterations": self.max_iterations, "max_iterations": self.max_iterations,
"base_url": self.base_url, "base_url": self.base_url,
"api_key": self.api_key, "api_key": self.api_key,
"verbose": self.verbose "verbose": self.verbose,
"ephemeral_system_prompt": self.ephemeral_system_prompt
} }
# Get completed prompts set # Get completed prompts set
@ -619,7 +627,8 @@ def main(
num_workers: int = 4, num_workers: int = 4,
resume: bool = False, resume: bool = False,
verbose: bool = False, verbose: bool = False,
list_distributions: bool = False list_distributions: bool = False,
ephemeral_system_prompt: str = None
): ):
""" """
Run batch processing of agent prompts from a dataset. Run batch processing of agent prompts from a dataset.
@ -637,6 +646,7 @@ def main(
resume (bool): Resume from checkpoint if run was interrupted (default: False) resume (bool): Resume from checkpoint if run was interrupted (default: False)
verbose (bool): Enable verbose logging (default: False) verbose (bool): Enable verbose logging (default: False)
list_distributions (bool): List available toolset distributions and exit list_distributions (bool): List available toolset distributions and exit
ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
Examples: Examples:
# Basic usage # Basic usage
@ -648,6 +658,10 @@ def main(
# Use specific distribution # Use specific distribution
python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=image_test --distribution=image_gen python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=image_test --distribution=image_gen
# With ephemeral system prompt (not saved to dataset)
python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run \\
--ephemeral_system_prompt="You are a helpful assistant focused on image generation."
# List available distributions # List available distributions
python batch_runner.py --list_distributions python batch_runner.py --list_distributions
""" """
@ -692,7 +706,8 @@ def main(
api_key=api_key, api_key=api_key,
model=model, model=model,
num_workers=num_workers, num_workers=num_workers,
verbose=verbose verbose=verbose,
ephemeral_system_prompt=ephemeral_system_prompt
) )
runner.run(resume=resume) runner.run(resume=resume)

View file

@ -63,7 +63,8 @@ class AIAgent:
enabled_toolsets: List[str] = None, enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None, disabled_toolsets: List[str] = None,
save_trajectories: bool = False, save_trajectories: bool = False,
verbose_logging: bool = False verbose_logging: bool = False,
ephemeral_system_prompt: str = None
): ):
""" """
Initialize the AI Agent. Initialize the AI Agent.
@ -78,12 +79,14 @@ class AIAgent:
disabled_toolsets (List[str]): Disable tools from these toolsets (optional) disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False) save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
verbose_logging (bool): Enable verbose logging for debugging (default: False) verbose_logging (bool): Enable verbose logging for debugging (default: False)
ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
""" """
self.model = model self.model = model
self.max_iterations = max_iterations self.max_iterations = max_iterations
self.tool_delay = tool_delay self.tool_delay = tool_delay
self.save_trajectories = save_trajectories self.save_trajectories = save_trajectories
self.verbose_logging = verbose_logging self.verbose_logging = verbose_logging
self.ephemeral_system_prompt = ephemeral_system_prompt
# Store toolset filtering options # Store toolset filtering options
self.enabled_toolsets = enabled_toolsets self.enabled_toolsets = enabled_toolsets
@ -157,6 +160,11 @@ class AIAgent:
# Show trajectory saving status # Show trajectory saving status
if self.save_trajectories: if self.save_trajectories:
print("📝 Trajectory saving enabled") print("📝 Trajectory saving enabled")
# Show ephemeral system prompt status
if self.ephemeral_system_prompt:
prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
def _format_tools_for_system_message(self) -> str: def _format_tools_for_system_message(self) -> str:
""" """
@ -343,7 +351,7 @@ class AIAgent:
Args: Args:
user_message (str): The user's message/question user_message (str): The user's message/question
system_message (str): Custom system message (optional) system_message (str): Custom system message (optional, overrides ephemeral_system_prompt if provided)
conversation_history (List[Dict]): Previous conversation messages (optional) conversation_history (List[Dict]): Previous conversation messages (optional)
Returns: Returns:
@ -360,6 +368,10 @@ class AIAgent:
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'") print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
# Determine which system prompt to use for API calls (ephemeral)
# Priority: explicit system_message > ephemeral_system_prompt > None
active_system_prompt = system_message if system_message is not None else self.ephemeral_system_prompt
# Main conversation loop # Main conversation loop
api_call_count = 0 api_call_count = 0
final_response = None final_response = None
@ -379,10 +391,17 @@ class AIAgent:
while retry_count <= max_retries: while retry_count <= max_retries:
try: try:
# Prepare messages for API call
# If we have an ephemeral system prompt, prepend it to the messages
api_messages = messages.copy()
if active_system_prompt:
# Insert system message at the beginning
api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
# Make API call with tools # Make API call with tools
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model=self.model, model=self.model,
messages=messages, messages=api_messages,
tools=self.tools if self.tools else None, tools=self.tools if self.tools else None,
timeout=60.0 # Add explicit timeout timeout=60.0 # Add explicit timeout
) )