Update RL tools and enhance configuration management

- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`.
- Enhanced `README.md` with installation instructions for submodules and updated API key usage.
- Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools.
- Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation.
- Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.
This commit is contained in:
teknium1 2026-02-04 13:57:59 -08:00
parent 12bbca95ec
commit 3c0d0dba49
7 changed files with 274 additions and 56 deletions

View file

@ -15,7 +15,7 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins
``` ```
The installer will: The installer will:
- Clone to `~/.hermes-agent` - Clone to `~/.hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
- Create a virtual environment - Create a virtual environment
- Install all dependencies - Install all dependencies
- Run the interactive setup wizard - Run the interactive setup wizard
@ -281,18 +281,10 @@ Train language models with reinforcement learning using the Tinker API and Atrop
```bash ```bash
TINKER_API_KEY=your-tinker-key # Get from https://tinker-console.thinkingmachines.ai/keys TINKER_API_KEY=your-tinker-key # Get from https://tinker-console.thinkingmachines.ai/keys
WANDB_API_KEY=your-wandb-key # Get from https://wandb.ai/authorize WANDB_API_KEY=your-wandb-key # Get from https://wandb.ai/authorize
OPENROUTER_API_KEY=your-key # Optional: for rl_test_inference
``` ```
2. **Install tinker-atropos:** (in a separate directory) 2. **That's it!** tinker-atropos is included as a submodule - no separate installation needed.
```bash
cd ~/tinker-atropos
pip install -e .
```
3. **Start the RL API server:**
```bash
rl-server # Runs on port 8080 by default
```
#### Using RL Tools #### Using RL Tools
@ -313,10 +305,12 @@ Agent: I'll set up an RL training run on the GSM8k environment...
| `rl_select_environment` | Select an environment for training | | `rl_select_environment` | Select an environment for training |
| `rl_get_current_config` | View all configurable options | | `rl_get_current_config` | View all configurable options |
| `rl_edit_config` | Change a configuration value | | `rl_edit_config` | Change a configuration value |
| `rl_test_inference` | Test environment with OpenRouter (pre-training validation) |
| `rl_start_training` | Start a training run | | `rl_start_training` | Start a training run |
| `rl_check_status` | Check training progress | | `rl_check_status` | Check training progress |
| `rl_stop_training` | Stop a running training | | `rl_stop_training` | Stop a running training |
| `rl_get_results` | Fetch WandB metrics | | `rl_get_results` | Fetch WandB metrics |
| `rl_list_runs` | List active training runs |
#### Dedicated RL CLI #### Dedicated RL CLI
@ -434,7 +428,7 @@ skills/
If you prefer not to use the installer: If you prefer not to use the installer:
```bash ```bash
# Clone the repository # Clone the repository (with submodules)
git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
cd hermes-agent cd hermes-agent
@ -445,6 +439,11 @@ cd hermes-agent
python3 -m venv venv python3 -m venv venv
source venv/bin/activate source venv/bin/activate
pip install -e ".[all]" pip install -e ".[all]"
# Install submodules (required for terminal and RL tools)
pip install -e "./mini-swe-agent" # Terminal tool backend
pip install -e "./tinker-atropos" # RL training backend
hermes setup hermes setup
``` ```

View file

@ -665,7 +665,7 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
"models": { "models": {
"type": "array", "type": "array",
"items": {"type": "string"}, "items": {"type": "string"},
"description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, zhipu-ai/glm-4-flash, minimax/minimax-m1" "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"
} }
}, },
"required": [] "required": []
@ -730,7 +730,7 @@ def get_all_tool_names() -> List[str]:
"rl_get_current_config", "rl_edit_config", "rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status", "rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results", "rl_stop_training", "rl_get_results",
"rl_list_runs" "rl_list_runs", "rl_test_inference"
]) ])
return tool_names return tool_names
@ -898,7 +898,7 @@ def get_tool_definitions(
"rl_get_current_config", "rl_edit_config", "rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status", "rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results", "rl_stop_training", "rl_get_results",
"rl_list_runs" "rl_list_runs", "rl_test_inference"
] ]
} }
legacy_tools = legacy_map.get(toolset_name, []) legacy_tools = legacy_map.get(toolset_name, [])
@ -950,7 +950,7 @@ def get_tool_definitions(
"rl_get_current_config", "rl_edit_config", "rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status", "rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results", "rl_stop_training", "rl_get_results",
"rl_list_runs" "rl_list_runs", "rl_test_inference"
] ]
} }
legacy_tools = legacy_map.get(toolset_name, []) legacy_tools = legacy_map.get(toolset_name, [])
@ -1407,7 +1407,7 @@ def handle_function_call(
"rl_get_current_config", "rl_edit_config", "rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status", "rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results", "rl_stop_training", "rl_get_results",
"rl_list_runs" "rl_list_runs", "rl_test_inference"
]: ]:
return handle_rl_function_call(function_name, function_args) return handle_rl_function_call(function_name, function_args)

View file

@ -25,14 +25,34 @@ import sys
from pathlib import Path from pathlib import Path
import fire import fire
import yaml
# Load environment variables from .env file # Load environment variables from .env file
from dotenv import load_dotenv from dotenv import load_dotenv
env_path = Path(__file__).parent / '.env' # Load from ~/.hermes/.env first, then local .env
if env_path.exists(): hermes_env_path = Path.home() / '.hermes' / '.env'
load_dotenv(dotenv_path=env_path) local_env_path = Path(__file__).parent / '.env'
print(f"✅ Loaded environment variables from {env_path}")
if hermes_env_path.exists():
load_dotenv(dotenv_path=hermes_env_path)
print(f"✅ Loaded environment variables from {hermes_env_path}")
elif local_env_path.exists():
load_dotenv(dotenv_path=local_env_path)
print(f"✅ Loaded environment variables from {local_env_path}")
# Set terminal working directory to tinker-atropos submodule
# This ensures terminal commands run in the right context for RL work
tinker_atropos_dir = Path(__file__).parent / 'tinker-atropos'
if tinker_atropos_dir.exists():
os.environ['TERMINAL_CWD'] = str(tinker_atropos_dir)
os.environ['HERMES_QUIET'] = '1' # Disable temp subdirectory creation
print(f"📂 Terminal working directory: {tinker_atropos_dir}")
else:
# Fall back to hermes-agent directory if submodule not found
os.environ['TERMINAL_CWD'] = str(Path(__file__).parent)
os.environ['HERMES_QUIET'] = '1'
print(f"⚠️ tinker-atropos submodule not found, using: {Path(__file__).parent}")
# Import agent and tools # Import agent and tools
from run_agent import AIAgent from run_agent import AIAgent
@ -40,6 +60,50 @@ from model_tools import get_tool_definitions, check_toolset_requirements
from tools.rl_training_tool import check_rl_api_keys, get_missing_keys from tools.rl_training_tool import check_rl_api_keys, get_missing_keys
# ============================================================================
# Config Loading
# ============================================================================
DEFAULT_MODEL = "anthropic/claude-opus-4.5"
DEFAULT_BASE_URL = "https://openrouter.ai/api/v1"
def load_hermes_config() -> dict:
"""
Load configuration from ~/.hermes/config.yaml.
Returns:
dict: Configuration with model, base_url, etc.
"""
config_path = Path.home() / '.hermes' / 'config.yaml'
config = {
"model": DEFAULT_MODEL,
"base_url": DEFAULT_BASE_URL,
}
if config_path.exists():
try:
with open(config_path, "r") as f:
file_config = yaml.safe_load(f) or {}
# Get model from config
if "model" in file_config:
if isinstance(file_config["model"], str):
config["model"] = file_config["model"]
elif isinstance(file_config["model"], dict):
config["model"] = file_config["model"].get("default", DEFAULT_MODEL)
# Get base_url if specified
if "base_url" in file_config:
config["base_url"] = file_config["base_url"]
except Exception as e:
print(f"⚠️ Warning: Failed to load config.yaml: {e}")
return config
# ============================================================================ # ============================================================================
# RL-Specific Configuration # RL-Specific Configuration
# ============================================================================ # ============================================================================
@ -108,7 +172,7 @@ When asked to train a model, follow this workflow:
""" """
# Toolsets to enable for RL workflows # Toolsets to enable for RL workflows
RL_TOOLSETS = ["base", "terminal", "web", "rl"] RL_TOOLSETS = ["terminal", "web", "rl"]
# ============================================================================ # ============================================================================
@ -172,9 +236,9 @@ def list_environments_sync():
def main( def main(
task: str = None, task: str = None,
model: str = "anthropic/claude-sonnet-4-20250514", model: str = None,
api_key: str = None, api_key: str = None,
base_url: str = "https://openrouter.ai/api/v1", base_url: str = None,
max_iterations: int = RL_MAX_ITERATIONS, max_iterations: int = RL_MAX_ITERATIONS,
interactive: bool = False, interactive: bool = False,
list_environments: bool = False, list_environments: bool = False,
@ -187,9 +251,9 @@ def main(
Args: Args:
task: The training task/goal (e.g., "Train a model on GSM8k for math") task: The training task/goal (e.g., "Train a model on GSM8k for math")
model: Model to use for the agent (default: claude-sonnet-4) model: Model to use for the agent (reads from ~/.hermes/config.yaml if not provided)
api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided) api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided)
base_url: API base URL (default: OpenRouter) base_url: API base URL (reads from config or defaults to OpenRouter)
max_iterations: Maximum agent iterations (default: 200 for long workflows) max_iterations: Maximum agent iterations (default: 200 for long workflows)
interactive: Run in interactive mode (multiple conversations) interactive: Run in interactive mode (multiple conversations)
list_environments: Just list available RL environments and exit list_environments: Just list available RL environments and exit
@ -210,6 +274,15 @@ def main(
# Check server status # Check server status
python rl_cli.py --check-server python rl_cli.py --check-server
""" """
# Load config from ~/.hermes/config.yaml
config = load_hermes_config()
# Use config values if not explicitly provided
if model is None:
model = config["model"]
if base_url is None:
base_url = config["base_url"]
print("🎯 RL Training Agent") print("🎯 RL Training Agent")
print("=" * 60) print("=" * 60)

View file

@ -1764,10 +1764,16 @@ class AIAgent:
self._invalid_tool_retries = 0 self._invalid_tool_retries = 0
# Validate tool call arguments are valid JSON # Validate tool call arguments are valid JSON
# Handle empty strings as empty objects (common model quirk)
invalid_json_args = [] invalid_json_args = []
for tc in assistant_message.tool_calls: for tc in assistant_message.tool_calls:
args = tc.function.arguments
# Treat empty/whitespace strings as empty object
if not args or not args.strip():
tc.function.arguments = "{}"
continue
try: try:
json.loads(tc.function.arguments) json.loads(args)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
invalid_json_args.append((tc.function.name, str(e))) invalid_json_args.append((tc.function.name, str(e)))

View file

@ -150,14 +150,15 @@ function Install-Repository {
} }
} else { } else {
# Try SSH first (for private repo access), fall back to HTTPS # Try SSH first (for private repo access), fall back to HTTPS
# Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
Write-Info "Trying SSH clone..." Write-Info "Trying SSH clone..."
$sshResult = git clone --branch $Branch $RepoUrlSsh $InstallDir 2>&1 $sshResult = git clone --branch $Branch --recurse-submodules $RepoUrlSsh $InstallDir 2>&1
if ($LASTEXITCODE -eq 0) { if ($LASTEXITCODE -eq 0) {
Write-Success "Cloned via SSH" Write-Success "Cloned via SSH"
} else { } else {
Write-Info "SSH failed, trying HTTPS..." Write-Info "SSH failed, trying HTTPS..."
$httpsResult = git clone --branch $Branch $RepoUrlHttps $InstallDir 2>&1 $httpsResult = git clone --branch $Branch --recurse-submodules $RepoUrlHttps $InstallDir 2>&1
if ($LASTEXITCODE -eq 0) { if ($LASTEXITCODE -eq 0) {
Write-Success "Cloned via HTTPS" Write-Success "Cloned via HTTPS"
@ -171,6 +172,13 @@ function Install-Repository {
} }
} }
# Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
Push-Location $InstallDir
git submodule update --init --recursive
Pop-Location
Write-Success "Submodules ready"
Write-Success "Repository ready" Write-Success "Repository ready"
} }
@ -208,15 +216,43 @@ function Install-Dependencies {
& .\venv\Scripts\Activate.ps1 & .\venv\Scripts\Activate.ps1
} }
# Install main package
try { try {
pip install -e ".[all]" 2>&1 | Out-Null pip install -e ".[all]" 2>&1 | Out-Null
} catch { } catch {
pip install -e "." | Out-Null pip install -e "." | Out-Null
} }
Write-Success "Main package installed"
# Install submodules
Write-Info "Installing mini-swe-agent (terminal tool backend)..."
if (Test-Path "mini-swe-agent\pyproject.toml") {
try {
pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
Write-Success "mini-swe-agent installed"
} catch {
Write-Warning "mini-swe-agent install failed (terminal tools may not work)"
}
} else {
Write-Warning "mini-swe-agent not found (run: git submodule update --init)"
}
Write-Info "Installing tinker-atropos (RL training backend)..."
if (Test-Path "tinker-atropos\pyproject.toml") {
try {
pip install -e ".\tinker-atropos" 2>&1 | Out-Null
Write-Success "tinker-atropos installed"
} catch {
Write-Warning "tinker-atropos install failed (RL tools may not work)"
}
} else {
Write-Warning "tinker-atropos not found (run: git submodule update --init)"
}
Pop-Location Pop-Location
Write-Success "Dependencies installed" Write-Success "All dependencies installed"
} }
function Set-PathVariable { function Set-PathVariable {

View file

@ -292,12 +292,13 @@ clone_repo() {
fi fi
else else
# Try SSH first (for private repo access), fall back to HTTPS # Try SSH first (for private repo access), fall back to HTTPS
# Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
log_info "Trying SSH clone..." log_info "Trying SSH clone..."
if git clone --branch "$BRANCH" "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
log_success "Cloned via SSH" log_success "Cloned via SSH"
else else
log_info "SSH failed, trying HTTPS..." log_info "SSH failed, trying HTTPS..."
if git clone --branch "$BRANCH" "$REPO_URL_HTTPS" "$INSTALL_DIR"; then if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
log_success "Cloned via HTTPS" log_success "Cloned via HTTPS"
else else
log_error "Failed to clone repository" log_error "Failed to clone repository"
@ -310,6 +311,12 @@ clone_repo() {
fi fi
cd "$INSTALL_DIR" cd "$INSTALL_DIR"
# Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
log_info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
git submodule update --init --recursive
log_success "Submodules ready"
log_success "Repository ready" log_success "Repository ready"
} }
@ -343,10 +350,29 @@ install_deps() {
source venv/bin/activate source venv/bin/activate
fi fi
# Install the package in editable mode with all extras # Install the main package in editable mode with all extras
pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
log_success "Dependencies installed" log_success "Main package installed"
# Install submodules
log_info "Installing mini-swe-agent (terminal tool backend)..."
if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
pip install -e "./mini-swe-agent" > /dev/null 2>&1 || log_warn "mini-swe-agent install failed (terminal tools may not work)"
log_success "mini-swe-agent installed"
else
log_warn "mini-swe-agent not found (run: git submodule update --init)"
fi
log_info "Installing tinker-atropos (RL training backend)..."
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
log_success "tinker-atropos installed"
else
log_warn "tinker-atropos not found (run: git submodule update --init)"
fi
log_success "All dependencies installed"
} }
setup_path() { setup_path() {

View file

@ -37,6 +37,7 @@ import subprocess
import sys import sys
import time import time
import uuid import uuid
from datetime import datetime
import yaml import yaml
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
@ -84,6 +85,7 @@ LOCKED_FIELDS = {
"weight": 1.0, "weight": 1.0,
"num_requests_for_eval": 256, "num_requests_for_eval": 256,
"timeout": 3600, "timeout": 3600,
"server_type": "sglang", # Tinker uses sglang for actual training
} }
], ],
"tinker": { "tinker": {
@ -211,6 +213,9 @@ def _scan_environments() -> List[EnvironmentInfo]:
def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]: def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
""" """
Dynamically import an environment and extract its config fields. Dynamically import an environment and extract its config fields.
Uses config_init() to get the actual config class, with fallback to
directly importing BaseEnvConfig if config_init fails.
""" """
try: try:
# Load the environment module # Load the environment module
@ -230,15 +235,38 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
if not env_class: if not env_class:
return {} return {}
# Call config_init to get the actual config # Try calling config_init to get the actual config class
env_config, server_configs = env_class.config_init() config_class = None
config_class = type(env_config) try:
env_config, server_configs = env_class.config_init()
config_class = type(env_config)
except Exception as config_error:
# Fallback: try to import BaseEnvConfig directly from atroposlib
print(f"Note: config_init failed ({config_error}), using BaseEnvConfig defaults")
try:
from atroposlib.envs.base import BaseEnvConfig
config_class = BaseEnvConfig
except ImportError:
return {}
if not config_class:
return {}
# Helper to make values JSON-serializable (handle enums, etc.)
def make_serializable(val):
if val is None:
return None
if hasattr(val, 'value'): # Enum
return val.value
if hasattr(val, 'name') and hasattr(val, '__class__') and 'Enum' in str(type(val)):
return val.name
return val
# Extract fields from the Pydantic model # Extract fields from the Pydantic model
fields = {} fields = {}
for field_name, field_info in config_class.model_fields.items(): for field_name, field_info in config_class.model_fields.items():
field_type = field_info.annotation field_type = field_info.annotation
default = field_info.default default = make_serializable(field_info.default)
description = field_info.description or "" description = field_info.description or ""
is_locked = field_name in LOCKED_FIELD_NAMES is_locked = field_name in LOCKED_FIELD_NAMES
@ -248,12 +276,15 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
if hasattr(field_type, "__origin__"): if hasattr(field_type, "__origin__"):
type_name = str(field_type) type_name = str(field_type)
locked_value = LOCKED_FIELDS.get("env", {}).get(field_name, default)
current_value = make_serializable(locked_value) if is_locked else default
fields[field_name] = { fields[field_name] = {
"type": type_name, "type": type_name,
"default": default if default is not None else None, "default": default,
"description": description, "description": description,
"locked": is_locked, "locked": is_locked,
"current_value": LOCKED_FIELDS.get("env", {}).get(field_name, default) if is_locked else default, "current_value": current_value,
} }
return fields return fields
@ -315,7 +346,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
trainer_log_file = open(trainer_log, "w") trainer_log_file = open(trainer_log, "w")
run_state.trainer_process = subprocess.Popen( run_state.trainer_process = subprocess.Popen(
["python", "launch_training.py", "--config", str(config_path)], [sys.executable, "launch_training.py", "--config", str(config_path)],
stdout=trainer_log_file, stdout=trainer_log_file,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
cwd=str(TINKER_ATROPOS_ROOT), cwd=str(TINKER_ATROPOS_ROOT),
@ -355,7 +386,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
env_log_file = open(env_log, "w") env_log_file = open(env_log, "w")
run_state.env_process = subprocess.Popen( run_state.env_process = subprocess.Popen(
["python", str(env_info.file_path), "serve", "--config", str(config_path)], [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
stdout=env_log_file, stdout=env_log_file,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
cwd=str(TINKER_ATROPOS_ROOT), cwd=str(TINKER_ATROPOS_ROOT),
@ -543,17 +574,14 @@ async def rl_select_environment(name: str) -> str:
if not field_info.get("locked", False): if not field_info.get("locked", False):
_current_config[field_name] = field_info.get("default") _current_config[field_name] = field_info.get("default")
configurable_count = sum(1 for f in config_fields.values() if not f.get("locked", False)) # Auto-set wandb_name to "{env_name}-DATETIME" to avoid overlaps
locked_count = sum(1 for f in config_fields.values() if f.get("locked", False)) timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
_current_config["wandb_name"] = f"{name}-{timestamp}"
return json.dumps({ return json.dumps({
"message": f"Selected environment: {name}", "message": f"Selected environment: {name}",
"environment": name, "environment": name,
"file_path": env_info.file_path, "file_path": env_info.file_path,
"configurable_fields": configurable_count,
"locked_fields": locked_count,
"config": _current_config,
"tip": f"Use rl_get_current_config() to see all {configurable_count} configurable fields.",
}, indent=2) }, indent=2)
@ -961,10 +989,11 @@ async def rl_list_runs() -> str:
# ============================================================================ # ============================================================================
# Test models at different scales for robustness testing # Test models at different scales for robustness testing
# These are cheap, capable models on OpenRouter for testing parsing/scoring
TEST_MODELS = [ TEST_MODELS = [
{"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"}, {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
{"id": "zhipu-ai/glm-4-flash", "name": "GLM-4 Flash", "scale": "medium"}, {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
{"id": "minimax/minimax-m1", "name": "MiniMax M1", "scale": "large"}, {"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"},
] ]
# Default test parameters - quick but representative # Default test parameters - quick but representative
@ -1066,18 +1095,35 @@ async def rl_test_inference(
# Build the process command using Atropos's built-in CLI # Build the process command using Atropos's built-in CLI
# This runs the environment's actual code with OpenRouter as the inference backend # This runs the environment's actual code with OpenRouter as the inference backend
# We pass our locked settings + test-specific overrides via CLI args
cmd = [ cmd = [
"python", env_info.file_path, "process", sys.executable, env_info.file_path, "process",
# Test-specific overrides
"--env.total_steps", str(num_steps), "--env.total_steps", str(num_steps),
"--env.group_size", str(group_size), "--env.group_size", str(group_size),
"--env.use_wandb", "false", "--env.use_wandb", "false", # No wandb for quick tests
"--env.data_path_to_save_groups", str(output_file), "--env.data_path_to_save_groups", str(output_file),
# Use locked settings from our config
"--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"],
"--env.max_token_length", str(LOCKED_FIELDS["env"]["max_token_length"]),
"--env.max_num_workers", str(LOCKED_FIELDS["env"]["max_num_workers"]),
"--env.max_batches_offpolicy", str(LOCKED_FIELDS["env"]["max_batches_offpolicy"]),
# OpenRouter config for inference testing
# IMPORTANT: Use server_type=openai for OpenRouter (not sglang)
# sglang is only for actual training with Tinker's inference server
"--openai.base_url", "https://openrouter.ai/api/v1", "--openai.base_url", "https://openrouter.ai/api/v1",
"--openai.api_key", api_key, "--openai.api_key", api_key,
"--openai.model_name", model_id, "--openai.model_name", model_id,
"--openai.server_type", "openai", # OpenRouter is OpenAI-compatible
"--openai.health_check", "false", # OpenRouter doesn't have health endpoint
] ]
print(f"Running: python {Path(env_info.file_path).name} process ...") # Debug: Print the full command
cmd_str = " ".join(str(c) for c in cmd)
# Hide API key in printed output
cmd_display = cmd_str.replace(api_key, "***API_KEY***")
print(f"Command: {cmd_display}")
print(f"Working dir: {TINKER_ATROPOS_ROOT}")
print(f" {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts") print(f" {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts")
model_results = { model_results = {
@ -1105,12 +1151,44 @@ async def rl_test_inference(
timeout=600, # 10 minute timeout per model timeout=600, # 10 minute timeout per model
) )
# Decode output
stdout_text = stdout.decode() if stdout else ""
stderr_text = stderr.decode() if stderr else ""
# Write logs to files for inspection outside CLI
log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log"
with open(log_file, "w") as f:
f.write(f"Command: {cmd_display}\n")
f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
f.write(f"Return code: {process.returncode}\n")
f.write(f"\n{'='*60}\n")
f.write(f"STDOUT:\n{'='*60}\n")
f.write(stdout_text or "(empty)\n")
f.write(f"\n{'='*60}\n")
f.write(f"STDERR:\n{'='*60}\n")
f.write(stderr_text or "(empty)\n")
print(f" Log file: {log_file}")
# Print to console for immediate debugging
if stdout_text.strip():
print(f"\n--- STDOUT ---")
print(stdout_text[-2000:]) # Last 2000 chars
if stderr_text.strip():
print(f"\n--- STDERR ---")
print(stderr_text[-2000:]) # Last 2000 chars
if process.returncode != 0: if process.returncode != 0:
model_results["error"] = f"Process exited with code {process.returncode}" model_results["error"] = f"Process exited with code {process.returncode}"
model_results["stderr"] = stderr.decode()[-1000:] model_results["stderr"] = stderr_text[-1000:]
print(f" Error: {model_results['error']}") model_results["stdout"] = stdout_text[-1000:]
model_results["log_file"] = str(log_file)
print(f"\n ❌ Error: {model_results['error']}")
else: else:
print(f" Process completed successfully") print(f"\n ✅ Process completed successfully")
print(f" Output file: {output_file}")
print(f" File exists: {output_file.exists()}")
# Parse the output JSONL file # Parse the output JSONL file
if output_file.exists(): if output_file.exists():