Add skills tools and enhance model integration

- Introduced new skills tools: `skills_categories`, `skills_list`, and `skill_view` in `model_tools.py`, allowing for better organization and access to skill-related functionalities. - Updated `toolsets.py` to include a new `skills` toolset, providing a dedicated space for skill tools. - Enhanced `batch_runner.py` to recognize and validate skills tools during batch processing. - Added comprehensive tool definitions for skills tools, ensuring compatibility with OpenAI's expected format. - Created new shell script `test_skills_kimi.sh` for testing skills tool functionality with Kimi K2.5. - Added example skill files demonstrating the structure and usage of skills within the Hermes-Agent framework, including `SKILL.md` for example and audiocraft skills. - Improved documentation for skills tools and their integration into the existing tool framework, ensuring clarity for future development and usage.
2026-01-30 07:39:55 +00:00 · 2026-01-30 07:39:55 +00:00 · f172f7d4aa
commit f172f7d4aa
parent 8e8b6be690
189 changed files with 116214 additions and 2 deletions
--- a/batch_runner.py
+++ b/batch_runner.py
@ -51,6 +51,8 @@ _WORKER_CONFIG = {}
 ALL_POSSIBLE_TOOLS = {
    'terminal', 'web_search', 'web_extract',
    'vision_analyze', 'image_generate', 'mixture_of_agents',
    # Skills tools
    'skills_categories', 'skills_list', 'skill_view',
    # Browser automation tools
    'browser_navigate', 'browser_snapshot', 'browser_click',
    'browser_type', 'browser_scroll', 'browser_back',
@ -835,6 +837,8 @@ class BatchRunner:
        VALID_TOOLS = {'web_search', 'web_extract', 'terminal', 'vision_analyze', 
                       'image_generate', 'mixture_of_agents',
                       # Skills tools
                       'skills_categories', 'skills_list', 'skill_view',
                       # Browser automation tools
                       'browser_navigate', 'browser_snapshot', 'browser_click',
                       'browser_type', 'browser_scroll', 'browser_back',
--- a/configs/test_skills_kimi.sh
+++ b/configs/test_skills_kimi.sh
@ -0,0 +1,21 @@
 #!/bin/bash
 # Test skills tool with Kimi K2.5
 # Usage: ./configs/test_skills_kimi.sh "your query here"
 # Example: ./configs/test_skills_kimi.sh "List available skills and show me the vllm skill"
 # Default query if none provided
 QUERY="${1:-List all available skills. Then show me the axolotl skill and view one of its reference files.}"
 echo "🎯 Testing Skills Tool with Kimi K2.5"
 echo "📝 Query: $QUERY"
 echo "=" 
 python run_agent.py \
  --enabled_toolsets=skills \
  --model="moonshotai/kimi-k2.5" \
  --base_url="https://openrouter.ai/api/v1" \
  --max_turns=10 \
  --verbose \
  --save_sample \
  --query="$QUERY"
--- a/example-skill/SKILL.md
+++ b/example-skill/SKILL.md
@ -0,0 +1,70 @@
 ---
 name: example-skill
 description: An example skill demonstrating the skill file format and structure
 ---
 # Example Skill
 This is an example skill file that demonstrates how to create skills for the Hermes Agent.
 ## Skill File Format
 Skills are markdown files with YAML frontmatter at the top:
 ```yaml
 ---
 name: your-skill-name
 description: A brief one-line description of what this skill does
 ---
 ```
 The frontmatter fields:
 - **name**: The identifier used to reference this skill (lowercase, hyphens for spaces)
 - **description**: A brief description shown when listing skills (keep under 200 chars)
 ## Writing Effective Skills
 ### 1. Be Specific and Actionable
 Good skills provide clear, actionable instructions:
 ```
 When reviewing code:
 1. Check for security vulnerabilities first
 2. Verify error handling is comprehensive
 3. Ensure tests cover edge cases
 ```
 ### 2. Include Examples
 Show concrete examples of what you want:
 ```python
 # Good: Descriptive variable names
 user_authentication_token = get_token()
 # Bad: Cryptic abbreviations  
 uat = gt()
 ```
 ### 3. Define When to Use
 Help the agent understand when this skill applies:
 > Use this skill when: reviewing pull requests, auditing security, or checking code quality.
 ## Skill Categories
 Consider organizing skills by purpose:
 - **Conventions**: Coding standards, API patterns, naming rules
 - **Workflows**: Step-by-step processes for deployments, reviews, releases
 - **Knowledge**: Domain-specific information, system architecture, gotchas
 - **Templates**: Boilerplate for common tasks, response formats
 ## Tips
 1. Keep the description concise - it's shown in the skills list
 2. Use headers to organize longer skills
 3. Include code examples where helpful
 4. Reference other skills if they're related
--- a/model_tools.py
+++ b/model_tools.py
@ -37,6 +37,7 @@ from tools.terminal_hecate import terminal_hecate_tool, check_hecate_requirement
 from tools.vision_tools import vision_analyze_tool, check_vision_requirements
 from tools.mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
 from tools.image_generation_tool import image_generate_tool, check_image_generation_requirements
 from tools.skills_tool import skills_categories, skills_list, skill_view, check_skills_requirements, SKILLS_TOOL_DESCRIPTION
 # Browser automation tools (agent-browser + Browserbase)
 from tools.browser_tool import (
    browser_navigate,
@ -239,6 +240,67 @@ def get_image_tool_definitions() -> List[Dict[str, Any]]:
    ]
 def get_skills_tool_definitions() -> List[Dict[str, Any]]:
    """
    Get tool definitions for skills tools in OpenAI's expected format.
    Returns:
        List[Dict]: List of skills tool definitions compatible with OpenAI API
    """
    return [
        {
            "type": "function",
            "function": {
                "name": "skills_list",
                "description": "List available skills (name + description). Use skill_view(name) to load full content.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "category": {
                            "type": "string",
                            "description": "Optional category filter (from skills_categories)"
                        }
                    },
                    "required": []
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "skills_categories",
                "description": "List available skill categories. Call first if you want to discover categories, then use skills_list(category) to filter, or call skills_list if unsure.",
                "parameters": {
                    "type": "object",
                    "properties": {},
                    "required": []
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "skill_view",
                "description": "Skills allow for loading information about specific tasks and workflows, as well as scripts and templates. Load a skill's full content or access its linked files (references, templates, scripts). First call returns SKILL.md content plus a 'linked_files' dict showing available references/templates/scripts. To access those, call again with file_path parameter.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "name": {
                            "type": "string",
                            "description": "The skill name (use skills_list to see available skills)"
                        },
                        "file_path": {
                            "type": "string",
                            "description": "OPTIONAL: Path to a linked file within the skill (e.g., 'references/api.md', 'templates/config.yaml', 'scripts/validate.py'). Omit to get the main SKILL.md content."
                        }
                    },
                    "required": ["name"]
                }
            }
        }
    ]
 def get_browser_tool_definitions() -> List[Dict[str, Any]]:
    """
    Get tool definitions for browser automation tools in OpenAI's expected format.
@ -280,6 +342,10 @@ def get_all_tool_names() -> List[str]:
    if check_image_generation_requirements():
        tool_names.extend(["image_generate"])
    # Skills tools
    if check_skills_requirements():
        tool_names.extend(["skills_categories", "skills_list", "skill_view"])
    # Browser automation tools
    if check_browser_requirements():
        tool_names.extend([
@ -309,6 +375,10 @@ def get_toolset_for_tool(tool_name: str) -> str:
        "vision_analyze": "vision_tools",
        "mixture_of_agents": "moa_tools",
        "image_generate": "image_tools",
        # Skills tools
        "skills_categories": "skills_tools",
        "skills_list": "skills_tools",
        "skill_view": "skills_tools",
        # Browser automation tools
        "browser_navigate": "browser_tools",
        "browser_snapshot": "browser_tools",
@ -383,6 +453,10 @@ def get_tool_definitions(
        for tool in get_image_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
    if check_skills_requirements():
        for tool in get_skills_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
    if check_browser_requirements():
        for tool in get_browser_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
@ -399,7 +473,7 @@ def get_tool_definitions(
                print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
            else:
                # Try legacy compatibility
-                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "browser_tools"]:
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools"]:
                    # Map legacy names to new system
                    legacy_map = {
                        "web_tools": ["web_search", "web_extract"],
@ -407,6 +481,7 @@ def get_tool_definitions(
                        "vision_tools": ["vision_analyze"],
                        "moa_tools": ["mixture_of_agents"],
                        "image_tools": ["image_generate"],
                        "skills_tools": ["skills_categories", "skills_list", "skill_view"],
                        "browser_tools": [
                            "browser_navigate", "browser_snapshot", "browser_click",
                            "browser_type", "browser_scroll", "browser_back",
@ -440,13 +515,14 @@ def get_tool_definitions(
                print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
            else:
                # Try legacy compatibility
-                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "browser_tools"]:
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools"]:
                    legacy_map = {
                        "web_tools": ["web_search", "web_extract"],
                        "terminal_tools": ["terminal"],
                        "vision_tools": ["vision_analyze"],
                        "moa_tools": ["mixture_of_agents"],
                        "image_tools": ["image_generate"],
                        "skills_tools": ["skills_categories", "skills_list", "skill_view"],
                        "browser_tools": [
                            "browser_navigate", "browser_snapshot", "browser_click",
                            "browser_type", "browser_scroll", "browser_back",
@ -639,6 +715,35 @@ def handle_image_function_call(function_name: str, function_args: Dict[str, Any]
        return json.dumps({"error": f"Unknown image generation function: {function_name}"}, ensure_ascii=False)
 def handle_skills_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
    """
    Handle function calls for skills tools.
    Args:
        function_name (str): Name of the skills function to call
        function_args (Dict): Arguments for the function
    Returns:
        str: Function result as JSON string
    """
    if function_name == "skills_categories":
        return skills_categories()
    elif function_name == "skills_list":
        category = function_args.get("category")
        return skills_list(category=category)
    elif function_name == "skill_view":
        name = function_args.get("name", "")
        if not name:
            return json.dumps({"error": "Skill name is required"}, ensure_ascii=False)
        file_path = function_args.get("file_path")
        return skill_view(name, file_path=file_path)
    else:
        return json.dumps({"error": f"Unknown skills function: {function_name}"}, ensure_ascii=False)
 # Browser tool handlers mapping
 BROWSER_HANDLERS = {
    "browser_navigate": browser_navigate,
@ -731,6 +836,10 @@ def handle_function_call(
        elif function_name in ["image_generate"]:
            return handle_image_function_call(function_name, function_args)
        # Route skills tools
        elif function_name in ["skills_categories", "skills_list", "skill_view"]:
            return handle_skills_function_call(function_name, function_args)
        # Route browser automation tools
        elif function_name in [
            "browser_navigate", "browser_snapshot", "browser_click",
@ -789,6 +898,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
            "description": "Generate high-quality images from text prompts using FAL.ai's FLUX.1 Krea model with automatic 2x upscaling for enhanced quality",
            "requirements": ["FAL_KEY environment variable", "fal-client package"]
        },
        "skills_tools": {
            "available": check_skills_requirements(),
            "tools": ["skills_categories", "skills_list", "skill_view"],
            "description": "Access skill documents that provide specialized instructions, guidelines, or knowledge the agent can load on demand",
            "requirements": ["skills/ directory in repo root"]
        },
        "browser_tools": {
            "available": check_browser_requirements(),
            "tools": [
@ -817,6 +932,7 @@ def check_toolset_requirements() -> Dict[str, bool]:
        "vision_tools": check_vision_requirements(),
        "moa_tools": check_moa_requirements(),
        "image_tools": check_image_generation_requirements(),
        "skills_tools": check_skills_requirements(),
        "browser_tools": check_browser_requirements()
    }
--- a/skills/mlops/accelerate/SKILL.md
+++ b/skills/mlops/accelerate/SKILL.md
@ -0,0 +1,332 @@
 ---
 name: huggingface-accelerate
 description: Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch command. HuggingFace ecosystem standard.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Distributed Training, HuggingFace, Accelerate, DeepSpeed, FSDP, Mixed Precision, PyTorch, DDP, Unified API, Simple]
 dependencies: [accelerate, torch, transformers]
 ---
 # HuggingFace Accelerate - Unified Distributed Training
 ## Quick start
 Accelerate simplifies distributed training to 4 lines of code.
 **Installation**:
 ```bash
 pip install accelerate
 ```
 **Convert PyTorch script** (4 lines):
 ```python
 import torch
 + from accelerate import Accelerator
 + accelerator = Accelerator()
  model = torch.nn.Transformer()
  optimizer = torch.optim.Adam(model.parameters())
  dataloader = torch.utils.data.DataLoader(dataset)
 + model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
  for batch in dataloader:
      optimizer.zero_grad()
      loss = model(batch)
 -     loss.backward()
 +     accelerator.backward(loss)
      optimizer.step()
 ```
 **Run** (single command):
 ```bash
 accelerate launch train.py
 ```
 ## Common workflows
 ### Workflow 1: From single GPU to multi-GPU
 **Original script**:
 ```python
 # train.py
 import torch
 model = torch.nn.Linear(10, 2).to('cuda')
 optimizer = torch.optim.Adam(model.parameters())
 dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
 for epoch in range(10):
    for batch in dataloader:
        batch = batch.to('cuda')
        optimizer.zero_grad()
        loss = model(batch).mean()
        loss.backward()
        optimizer.step()
 ```
 **With Accelerate** (4 lines added):
 ```python
 # train.py
 import torch
 from accelerate import Accelerator  # +1
 accelerator = Accelerator()  # +2
 model = torch.nn.Linear(10, 2)
 optimizer = torch.optim.Adam(model.parameters())
 dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
 model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)  # +3
 for epoch in range(10):
    for batch in dataloader:
        # No .to('cuda') needed - automatic!
        optimizer.zero_grad()
        loss = model(batch).mean()
        accelerator.backward(loss)  # +4
        optimizer.step()
 ```
 **Configure** (interactive):
 ```bash
 accelerate config
 ```
 **Questions**:
 - Which machine? (single/multi GPU/TPU/CPU)
 - How many machines? (1)
 - Mixed precision? (no/fp16/bf16/fp8)
 - DeepSpeed? (no/yes)
 **Launch** (works on any setup):
 ```bash
 # Single GPU
 accelerate launch train.py
 # Multi-GPU (8 GPUs)
 accelerate launch --multi_gpu --num_processes 8 train.py
 # Multi-node
 accelerate launch --multi_gpu --num_processes 16 \
  --num_machines 2 --machine_rank 0 \
  --main_process_ip $MASTER_ADDR \
  train.py
 ```
 ### Workflow 2: Mixed precision training
 **Enable FP16/BF16**:
 ```python
 from accelerate import Accelerator
 # FP16 (with gradient scaling)
 accelerator = Accelerator(mixed_precision='fp16')
 # BF16 (no scaling, more stable)
 accelerator = Accelerator(mixed_precision='bf16')
 # FP8 (H100+)
 accelerator = Accelerator(mixed_precision='fp8')
 model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
 # Everything else is automatic!
 for batch in dataloader:
    with accelerator.autocast():  # Optional, done automatically
        loss = model(batch)
    accelerator.backward(loss)
 ```
 ### Workflow 3: DeepSpeed ZeRO integration
 **Enable DeepSpeed ZeRO-2**:
 ```python
 from accelerate import Accelerator
 accelerator = Accelerator(
    mixed_precision='bf16',
    deepspeed_plugin={
        "zero_stage": 2,  # ZeRO-2
        "offload_optimizer": False,
        "gradient_accumulation_steps": 4
    }
 )
 # Same code as before!
 model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
 ```
 **Or via config**:
 ```bash
 accelerate config
 # Select: DeepSpeed → ZeRO-2
 ```
 **deepspeed_config.json**:
 ```json
 {
    "fp16": {"enabled": false},
    "bf16": {"enabled": true},
    "zero_optimization": {
        "stage": 2,
        "offload_optimizer": {"device": "cpu"},
        "allgather_bucket_size": 5e8,
        "reduce_bucket_size": 5e8
    }
 }
 ```
 **Launch**:
 ```bash
 accelerate launch --config_file deepspeed_config.json train.py
 ```
 ### Workflow 4: FSDP (Fully Sharded Data Parallel)
 **Enable FSDP**:
 ```python
 from accelerate import Accelerator, FullyShardedDataParallelPlugin
 fsdp_plugin = FullyShardedDataParallelPlugin(
    sharding_strategy="FULL_SHARD",  # ZeRO-3 equivalent
    auto_wrap_policy="TRANSFORMER_AUTO_WRAP",
    cpu_offload=False
 )
 accelerator = Accelerator(
    mixed_precision='bf16',
    fsdp_plugin=fsdp_plugin
 )
 model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
 ```
 **Or via config**:
 ```bash
 accelerate config
 # Select: FSDP → Full Shard → No CPU Offload
 ```
 ### Workflow 5: Gradient accumulation
 **Accumulate gradients**:
 ```python
 from accelerate import Accelerator
 accelerator = Accelerator(gradient_accumulation_steps=4)
 model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
 for batch in dataloader:
    with accelerator.accumulate(model):  # Handles accumulation
        optimizer.zero_grad()
        loss = model(batch)
        accelerator.backward(loss)
        optimizer.step()
 ```
 **Effective batch size**: `batch_size * num_gpus * gradient_accumulation_steps`
 ## When to use vs alternatives
 **Use Accelerate when**:
 - Want simplest distributed training
 - Need single script for any hardware
 - Use HuggingFace ecosystem
 - Want flexibility (DDP/DeepSpeed/FSDP/Megatron)
 - Need quick prototyping
 **Key advantages**:
 - **4 lines**: Minimal code changes
 - **Unified API**: Same code for DDP, DeepSpeed, FSDP, Megatron
 - **Automatic**: Device placement, mixed precision, sharding
 - **Interactive config**: No manual launcher setup
 - **Single launch**: Works everywhere
 **Use alternatives instead**:
 - **PyTorch Lightning**: Need callbacks, high-level abstractions
 - **Ray Train**: Multi-node orchestration, hyperparameter tuning
 - **DeepSpeed**: Direct API control, advanced features
 - **Raw DDP**: Maximum control, minimal abstraction
 ## Common issues
 **Issue: Wrong device placement**
 Don't manually move to device:
 ```python
 # WRONG
 batch = batch.to('cuda')
 # CORRECT
 # Accelerate handles it automatically after prepare()
 ```
 **Issue: Gradient accumulation not working**
 Use context manager:
 ```python
 # CORRECT
 with accelerator.accumulate(model):
    optimizer.zero_grad()
    accelerator.backward(loss)
    optimizer.step()
 ```
 **Issue: Checkpointing in distributed**
 Use accelerator methods:
 ```python
 # Save only on main process
 if accelerator.is_main_process:
    accelerator.save_state('checkpoint/')
 # Load on all processes
 accelerator.load_state('checkpoint/')
 ```
 **Issue: Different results with FSDP**
 Ensure same random seed:
 ```python
 from accelerate.utils import set_seed
 set_seed(42)
 ```
 ## Advanced topics
 **Megatron integration**: See [references/megatron-integration.md](references/megatron-integration.md) for tensor parallelism, pipeline parallelism, and sequence parallelism setup.
 **Custom plugins**: See [references/custom-plugins.md](references/custom-plugins.md) for creating custom distributed plugins and advanced configuration.
 **Performance tuning**: See [references/performance.md](references/performance.md) for profiling, memory optimization, and best practices.
 ## Hardware requirements
 - **CPU**: Works (slow)
 - **Single GPU**: Works
 - **Multi-GPU**: DDP (default), DeepSpeed, or FSDP
 - **Multi-node**: DDP, DeepSpeed, FSDP, Megatron
 - **TPU**: Supported
 - **Apple MPS**: Supported
 **Launcher requirements**:
 - **DDP**: `torch.distributed.run` (built-in)
 - **DeepSpeed**: `deepspeed` (pip install deepspeed)
 - **FSDP**: PyTorch 1.12+ (built-in)
 - **Megatron**: Custom setup
 ## Resources
 - Docs: https://huggingface.co/docs/accelerate
 - GitHub: https://github.com/huggingface/accelerate
 - Version: 1.11.0+
 - Tutorial: "Accelerate your scripts"
 - Examples: https://github.com/huggingface/accelerate/tree/main/examples
 - Used by: HuggingFace Transformers, TRL, PEFT, all HF libraries
--- a/skills/mlops/accelerate/references/custom-plugins.md
+++ b/skills/mlops/accelerate/references/custom-plugins.md
@ -0,0 +1,453 @@
 # Custom Plugins for Accelerate
 ## Overview
 Accelerate allows creating **custom plugins** to extend distributed training strategies beyond built-in options (DDP, FSDP, DeepSpeed).
 ## Plugin Architecture
 ### Base Plugin Structure
 ```python
 from accelerate.utils import DistributedDataParallelKwargs
 from dataclasses import dataclass
@dataclass
 class CustomPlugin:
    """Custom training plugin."""
    # Plugin configuration
    param1: int = 1
    param2: str = "default"
    def __post_init__(self):
        # Validation logic
        if self.param1 < 1:
            raise ValueError("param1 must be >= 1")
 ```
 ### Using Custom Plugin
 ```python
 from accelerate import Accelerator
 # Create plugin
 custom_plugin = CustomPlugin(param1=4, param2="value")
 # Pass to Accelerator
 accelerator = Accelerator(
    custom_plugin=custom_plugin  # Not a real parameter, example only
 )
 ```
 ## Built-In Plugin Examples
 ### 1. GradScalerKwargs (FP16 Configuration)
 ```python
 from accelerate.utils import GradScalerKwargs
 # Configure gradient scaler for FP16
 scaler_kwargs = GradScalerKwargs(
    init_scale=2.**16,        # Initial loss scale
    growth_factor=2.0,        # Scale growth rate
    backoff_factor=0.5,       # Scale backoff rate
    growth_interval=2000,     # Steps between scale increases
    enabled=True              # Enable scaler
 )
 accelerator = Accelerator(
    mixed_precision='fp16',
    kwargs_handlers=[scaler_kwargs]  # Pass as kwargs handler
 )
 ```
 **Use case**: Fine-tune FP16 gradient scaling behavior
 ### 2. DistributedDataParallelKwargs
 ```python
 from accelerate.utils import DistributedDataParallelKwargs
 # Configure DDP behavior
 ddp_kwargs = DistributedDataParallelKwargs(
    bucket_cap_mb=25,                 # Gradient bucketing size
    find_unused_parameters=False,     # Find unused params (slower)
    check_reduction=False,            # Check gradient reduction
    gradient_as_bucket_view=True,     # Memory optimization
    static_graph=False                # Static computation graph
 )
 accelerator = Accelerator(
    kwargs_handlers=[ddp_kwargs]
 )
 ```
 **Use case**: Optimize DDP performance for specific models
 ### 3. FP8RecipeKwargs (H100 FP8)
 ```python
 from accelerate.utils import FP8RecipeKwargs
 # Configure FP8 training (H100)
 fp8_recipe = FP8RecipeKwargs(
    backend="te",              # TransformerEngine backend
    margin=0,                  # Scaling margin
    interval=1,                # Scaling interval
    fp8_format="HYBRID",       # E4M3 + E5M2 hybrid
    amax_history_len=1024,     # AMAX history length
    amax_compute_algo="max"    # AMAX computation algorithm
 )
 accelerator = Accelerator(
    mixed_precision='fp8',
    kwargs_handlers=[fp8_recipe]
 )
 ```
 **Use case**: Ultra-fast training on H100 GPUs
 ## Custom DeepSpeed Configuration
 ### ZeRO-3 with CPU Offload
 ```python
 from accelerate import Accelerator
 from accelerate.utils import DeepSpeedPlugin
 # Custom DeepSpeed config
 ds_plugin = DeepSpeedPlugin(
    zero_stage=3,                     # ZeRO-3
    offload_optimizer_device="cpu",   # CPU offload optimizer
    offload_param_device="cpu",       # CPU offload parameters
    zero3_init_flag=True,             # ZeRO-3 initialization
    zero3_save_16bit_model=True,      # Save FP16 weights
 )
 accelerator = Accelerator(
    deepspeed_plugin=ds_plugin,
    mixed_precision='bf16'
 )
 ```
 ### ZeRO-2 with NVMe Offload
 ```python
 ds_plugin = DeepSpeedPlugin(
    zero_stage=2,
    offload_optimizer_device="nvme",  # NVMe offload
    offload_param_device="nvme",
    nvme_path="/local_nvme",          # NVMe mount path
 )
 ```
 ### Custom JSON Config
 ```python
 import json
 # Load custom DeepSpeed config
 with open('deepspeed_config.json', 'r') as f:
    ds_config = json.load(f)
 ds_plugin = DeepSpeedPlugin(hf_ds_config=ds_config)
 accelerator = Accelerator(deepspeed_plugin=ds_plugin)
 ```
 **Example config** (`deepspeed_config.json`):
 ```json
 {
  "train_batch_size": "auto",
  "train_micro_batch_size_per_gpu": "auto",
  "gradient_accumulation_steps": "auto",
  "gradient_clipping": 1.0,
  "zero_optimization": {
    "stage": 3,
    "offload_optimizer": {
      "device": "cpu",
      "pin_memory": true
    },
    "offload_param": {
      "device": "cpu",
      "pin_memory": true
    },
    "overlap_comm": true,
    "contiguous_gradients": true,
    "sub_group_size": 1e9,
    "reduce_bucket_size": 5e8,
    "stage3_prefetch_bucket_size": 5e8,
    "stage3_param_persistence_threshold": 1e6,
    "stage3_max_live_parameters": 1e9,
    "stage3_max_reuse_distance": 1e9,
    "stage3_gather_16bit_weights_on_model_save": true
  },
  "bf16": {
    "enabled": true
  },
  "steps_per_print": 100,
  "wall_clock_breakdown": false
 }
 ```
 ## Custom FSDP Configuration
 ### FSDP with Custom Auto-Wrap Policy
 ```python
 from accelerate.utils import FullyShardedDataParallelPlugin
 from torch.distributed.fsdp import BackwardPrefetch, ShardingStrategy
 from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy
 import functools
 # Custom wrap policy (size-based)
 wrap_policy = functools.partial(
    size_based_auto_wrap_policy,
    min_num_params=1e6  # Wrap layers with 1M+ params
 )
 fsdp_plugin = FullyShardedDataParallelPlugin(
    sharding_strategy=ShardingStrategy.FULL_SHARD,  # ZeRO-3 equivalent
    backward_prefetch=BackwardPrefetch.BACKWARD_PRE,  # Prefetch strategy
    mixed_precision_policy=None,  # Use Accelerator's mixed precision
    auto_wrap_policy=wrap_policy,  # Custom wrapping
    cpu_offload=False,
    ignored_modules=None,  # Modules to not wrap
    state_dict_type="FULL_STATE_DICT",  # Save format
    optim_state_dict_config=None,
    limit_all_gathers=False,
    use_orig_params=True,  # Use original param shapes
 )
 accelerator = Accelerator(
    fsdp_plugin=fsdp_plugin,
    mixed_precision='bf16'
 )
 ```
 ### FSDP with Transformer Auto-Wrap
 ```python
 from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
 from transformers.models.gpt2.modeling_gpt2 import GPT2Block
 # Wrap at transformer block level
 wrap_policy = functools.partial(
    transformer_auto_wrap_policy,
    transformer_layer_cls={GPT2Block}  # Wrap GPT2Block layers
 )
 fsdp_plugin = FullyShardedDataParallelPlugin(
    auto_wrap_policy=wrap_policy
 )
 ```
 ## Creating Custom Training Strategy
 ### Example: Custom Gradient Accumulation
 ```python
 from accelerate import Accelerator
 class CustomGradientAccumulation:
    def __init__(self, steps=4, adaptive=False):
        self.steps = steps
        self.adaptive = adaptive
        self.current_step = 0
    def should_sync(self, loss):
        """Decide whether to sync gradients."""
        self.current_step += 1
        # Adaptive: sync on high loss
        if self.adaptive and loss > threshold:
            self.current_step = 0
            return True
        # Regular: sync every N steps
        if self.current_step >= self.steps:
            self.current_step = 0
            return True
        return False
 # Usage
 custom_accum = CustomGradientAccumulation(steps=8, adaptive=True)
 accelerator = Accelerator()
 for batch in dataloader:
    outputs = model(**batch)
    loss = outputs.loss
    # Scale loss
    loss = loss / custom_accum.steps
    accelerator.backward(loss)
    # Conditional sync
    if custom_accum.should_sync(loss.item()):
        optimizer.step()
        optimizer.zero_grad()
 ```
 ### Example: Custom Mixed Precision
 ```python
 import torch
 class CustomMixedPrecision:
    """Custom mixed precision with dynamic loss scaling."""
    def __init__(self, init_scale=2**16, scale_window=2000):
        self.scaler = torch.cuda.amp.GradScaler(
            init_scale=init_scale,
            growth_interval=scale_window
        )
        self.scale_history = []
    def scale_loss(self, loss):
        """Scale loss for backward."""
        return self.scaler.scale(loss)
    def unscale_and_clip(self, optimizer, max_norm=1.0):
        """Unscale gradients and clip."""
        self.scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(
            optimizer.param_groups[0]['params'],
            max_norm
        )
    def step(self, optimizer):
        """Optimizer step with scaler update."""
        scale_before = self.scaler.get_scale()
        self.scaler.step(optimizer)
        self.scaler.update()
        scale_after = self.scaler.get_scale()
        # Track scale changes
        if scale_before != scale_after:
            self.scale_history.append(scale_after)
 # Usage
 custom_mp = CustomMixedPrecision()
 for batch in dataloader:
    with torch.cuda.amp.autocast(dtype=torch.float16):
        loss = model(**batch).loss
    scaled_loss = custom_mp.scale_loss(loss)
    scaled_loss.backward()
    custom_mp.unscale_and_clip(optimizer, max_norm=1.0)
    custom_mp.step(optimizer)
    optimizer.zero_grad()
 ```
 ## Advanced: Custom Distributed Backend
 ### Custom AllReduce Strategy
 ```python
 import torch.distributed as dist
 class CustomAllReduce:
    """Custom all-reduce with compression."""
    def __init__(self, compression_ratio=0.1):
        self.compression_ratio = compression_ratio
    def compress_gradients(self, tensor):
        """Top-k gradient compression."""
        k = int(tensor.numel() * self.compression_ratio)
        values, indices = torch.topk(tensor.abs().view(-1), k)
        return values, indices
    def all_reduce_compressed(self, tensor):
        """All-reduce with gradient compression."""
        # Compress
        values, indices = self.compress_gradients(tensor)
        # All-reduce compressed gradients
        dist.all_reduce(values, op=dist.ReduceOp.SUM)
        # Decompress
        tensor_compressed = torch.zeros_like(tensor).view(-1)
        tensor_compressed[indices] = values / dist.get_world_size()
        return tensor_compressed.view_as(tensor)
 # Usage in training loop
 custom_ar = CustomAllReduce(compression_ratio=0.1)
 for batch in dataloader:
    loss = model(**batch).loss
    loss.backward()
    # Custom all-reduce
    for param in model.parameters():
        if param.grad is not None:
            param.grad.data = custom_ar.all_reduce_compressed(param.grad.data)
    optimizer.step()
    optimizer.zero_grad()
 ```
 ## Plugin Best Practices
 ### 1. Validation in `__post_init__`
 ```python
@dataclass
 class CustomPlugin:
    learning_rate: float = 1e-3
    warmup_steps: int = 1000
    def __post_init__(self):
        # Validate parameters
        if self.learning_rate <= 0:
            raise ValueError("learning_rate must be positive")
        if self.warmup_steps < 0:
            raise ValueError("warmup_steps must be non-negative")
        # Compute derived values
        self.min_lr = self.learning_rate * 0.1
 ```
 ### 2. Compatibility Checks
 ```python
@dataclass
 class CustomPlugin:
    feature_enabled: bool = True
    def is_compatible(self, accelerator):
        """Check if plugin is compatible with accelerator config."""
        if self.feature_enabled and accelerator.mixed_precision == 'fp8':
            raise ValueError("Custom plugin not compatible with FP8")
        return True
 ```
 ### 3. State Management
 ```python
@dataclass
 class CustomPlugin:
    counter: int = 0
    history: list = None
    def __post_init__(self):
        if self.history is None:
            self.history = []
    def update_state(self, value):
        """Update plugin state during training."""
        self.counter += 1
        self.history.append(value)
 ```
 ## Resources
 - Accelerate Plugins: https://huggingface.co/docs/accelerate/package_reference/kwargs
 - DeepSpeed Config: https://www.deepspeed.ai/docs/config-json/
 - FSDP Guide: https://pytorch.org/docs/stable/fsdp.html
 - Custom Training Loops: https://huggingface.co/docs/accelerate/usage_guides/training_tpu
--- a/skills/mlops/accelerate/references/megatron-integration.md
+++ b/skills/mlops/accelerate/references/megatron-integration.md
@ -0,0 +1,489 @@
 # Megatron Integration with Accelerate
 ## Overview
 Accelerate supports Megatron-LM for massive model training with tensor parallelism and pipeline parallelism.
 **Megatron capabilities**:
 - **Tensor Parallelism (TP)**: Split layers across GPUs
 - **Pipeline Parallelism (PP)**: Split model depth across GPUs
 - **Data Parallelism (DP)**: Replicate model across GPU groups
 - **Sequence Parallelism**: Split sequences for long contexts
 ## Setup
 ### Install Megatron-LM
 ```bash
 # Clone Megatron-LM repository
 git clone https://github.com/NVIDIA/Megatron-LM.git
 cd Megatron-LM
 pip install -e .
 # Install Apex (NVIDIA optimizations)
 git clone https://github.com/NVIDIA/apex
 cd apex
 pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
  --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
 ```
 ### Accelerate Configuration
 ```bash
 accelerate config
 ```
 **Questions**:
 ```
 In which compute environment are you running?
 > This machine
 Which type of machine are you using?
 > Multi-GPU
 How many different machines will you use?
 > 1
 Do you want to use DeepSpeed/FSDP?
 > No
 Do you want to use Megatron-LM?
 > Yes
 What is the Tensor Parallelism degree? [1-8]
 > 2
 Do you want to enable Sequence Parallelism?
 > No
 What is the Pipeline Parallelism degree? [1-8]
 > 2
 What is the Data Parallelism degree? [1-8]
 > 2
 Where to perform activation checkpointing? ['SELECTIVE', 'FULL', 'NONE']
 > SELECTIVE
 Where to perform activation partitioning? ['SEQUENTIAL', 'UNIFORM']
 > SEQUENTIAL
 ```
 **Generated config** (`~/.cache/huggingface/accelerate/default_config.yaml`):
 ```yaml
 compute_environment: LOCAL_MACHINE
 distributed_type: MEGATRON_LM
 downcast_bf16: 'no'
 machine_rank: 0
 main_training_function: main
 megatron_lm_config:
  megatron_lm_gradient_clipping: 1.0
  megatron_lm_learning_rate_decay_iters: 320000
  megatron_lm_num_micro_batches: 1
  megatron_lm_pp_degree: 2
  megatron_lm_recompute_activations: true
  megatron_lm_sequence_parallelism: false
  megatron_lm_tp_degree: 2
 mixed_precision: bf16
 num_machines: 1
 num_processes: 8
 rdzv_backend: static
 same_network: true
 tpu_env: []
 tpu_use_cluster: false
 tpu_use_sudo: false
 use_cpu: false
 ```
 ## Parallelism Strategies
 ### Tensor Parallelism (TP)
 **Splits each transformer layer across GPUs**:
 ```python
 # Layer split across 2 GPUs
 # GPU 0: First half of attention heads
 # GPU 1: Second half of attention heads
 # Each GPU computes partial outputs
 # All-reduce combines results
 ```
 **TP degree recommendations**:
 - **TP=1**: No tensor parallelism (single GPU per layer)
 - **TP=2**: 2 GPUs per layer (good for 7-13B models)
 - **TP=4**: 4 GPUs per layer (good for 20-40B models)
 - **TP=8**: 8 GPUs per layer (good for 70B+ models)
 **Benefits**:
 - Reduces memory per GPU
 - All-reduce communication (fast)
 **Drawbacks**:
 - Requires fast inter-GPU bandwidth (NVLink)
 - Communication overhead per layer
 ### Pipeline Parallelism (PP)
 **Splits model depth across GPUs**:
 ```python
 # 12-layer model, PP=4
 # GPU 0: Layers 0-2
 # GPU 1: Layers 3-5
 # GPU 2: Layers 6-8
 # GPU 3: Layers 9-11
 ```
 **PP degree recommendations**:
 - **PP=1**: No pipeline parallelism
 - **PP=2**: 2 pipeline stages (good for 20-40B models)
 - **PP=4**: 4 pipeline stages (good for 70B+ models)
 - **PP=8**: 8 pipeline stages (good for 175B+ models)
 **Benefits**:
 - Linear memory reduction (4× PP = 4× less memory)
 - Works across nodes (slower interconnect OK)
 **Drawbacks**:
 - Pipeline bubbles (idle time)
 - Requires micro-batching
 ### Data Parallelism (DP)
 **Replicates model across GPU groups**:
 ```python
 # 8 GPUs, TP=2, PP=2, DP=2
 # Group 0 (GPUs 0-3): Full model replica
 # Group 1 (GPUs 4-7): Full model replica
 ```
 **DP degree**:
 - `DP = total_gpus / (TP × PP)`
 - Example: 8 GPUs, TP=2, PP=2 → DP=2
 **Benefits**:
 - Increases throughput
 - Scales batch size
 ### Sequence Parallelism
 **Splits long sequences across GPUs** (extends TP):
 ```python
 # 8K sequence, TP=2, Sequence Parallel=True
 # GPU 0: Tokens 0-4095
 # GPU 1: Tokens 4096-8191
 ```
 **Benefits**:
 - Enables very long sequences (100K+ tokens)
 - Reduces activation memory
 **Requirements**:
 - Must use with TP > 1
 - RoPE/ALiBi position encodings work best
 ## Accelerate Code Example
 ### Basic Setup
 ```python
 from accelerate import Accelerator
 from accelerate.utils import MegatronLMPlugin
 # Configure Megatron
 megatron_plugin = MegatronLMPlugin(
    tp_degree=2,              # Tensor parallelism degree
    pp_degree=2,              # Pipeline parallelism degree
    num_micro_batches=4,      # Micro-batches for pipeline
    gradient_clipping=1.0,    # Gradient clipping value
    sequence_parallelism=False,  # Enable sequence parallelism
    recompute_activations=True,  # Activation checkpointing
    use_distributed_optimizer=True,  # Distributed optimizer
    custom_prepare_model_function=None,  # Custom model prep
 )
 # Initialize accelerator
 accelerator = Accelerator(
    mixed_precision='bf16',
    megatron_lm_plugin=megatron_plugin
 )
 # Prepare model and optimizer
 model, optimizer, train_dataloader = accelerator.prepare(
    model, optimizer, train_dataloader
 )
 # Training loop (same as DDP!)
 for batch in train_dataloader:
    optimizer.zero_grad()
    outputs = model(**batch)
    loss = outputs.loss
    accelerator.backward(loss)
    optimizer.step()
 ```
 ### Full Training Script
 ```python
 import torch
 from accelerate import Accelerator
 from accelerate.utils import MegatronLMPlugin
 from transformers import GPT2Config, GPT2LMHeadModel
 def main():
    # Megatron configuration
    megatron_plugin = MegatronLMPlugin(
        tp_degree=2,
        pp_degree=2,
        num_micro_batches=4,
        gradient_clipping=1.0,
    )
    accelerator = Accelerator(
        mixed_precision='bf16',
        gradient_accumulation_steps=8,
        megatron_lm_plugin=megatron_plugin
    )
    # Model
    config = GPT2Config(
        n_layer=24,
        n_head=16,
        n_embd=1024,
    )
    model = GPT2LMHeadModel(config)
    # Optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=6e-4)
    # Prepare
    model, optimizer, train_loader = accelerator.prepare(
        model, optimizer, train_loader
    )
    # Training loop
    for epoch in range(num_epochs):
        for batch in train_loader:
            with accelerator.accumulate(model):
                outputs = model(**batch)
                loss = outputs.loss
                accelerator.backward(loss)
                optimizer.step()
                optimizer.zero_grad()
        # Save checkpoint
        accelerator.wait_for_everyone()
        accelerator.save_state(f'checkpoint-epoch-{epoch}')
 if __name__ == '__main__':
    main()
 ```
 ### Launch Command
 ```bash
 # 8 GPUs, TP=2, PP=2, DP=2
 accelerate launch --multi_gpu --num_processes 8 train.py
 # Multi-node (2 nodes, 8 GPUs each)
 # Node 0
 accelerate launch --multi_gpu --num_processes 16 \
  --num_machines 2 --machine_rank 0 \
  --main_process_ip $MASTER_ADDR \
  --main_process_port 29500 \
  train.py
 # Node 1
 accelerate launch --multi_gpu --num_processes 16 \
  --num_machines 2 --machine_rank 1 \
  --main_process_ip $MASTER_ADDR \
  --main_process_port 29500 \
  train.py
 ```
 ## Activation Checkpointing
 **Reduces memory by recomputing activations**:
 ```python
 megatron_plugin = MegatronLMPlugin(
    recompute_activations=True,      # Enable checkpointing
    checkpoint_num_layers=1,         # Checkpoint every N layers
    distribute_checkpointed_activations=True,  # Distribute across TP
    partition_activations=True,      # Partition in PP
    check_for_nan_in_loss_and_grad=True,  # Stability check
 )
 ```
 **Strategies**:
 - `SELECTIVE`: Checkpoint transformer blocks only
 - `FULL`: Checkpoint all layers
 - `NONE`: No checkpointing
 **Memory savings**: 30-50% with 10-15% slowdown
 ## Distributed Optimizer
 **Shards optimizer state across DP ranks**:
 ```python
 megatron_plugin = MegatronLMPlugin(
    use_distributed_optimizer=True,  # Enable sharded optimizer
 )
 ```
 **Benefits**:
 - Reduces optimizer memory by DP degree
 - Example: DP=4 → 4× less optimizer memory per GPU
 **Compatible with**:
 - AdamW, Adam, SGD
 - Mixed precision training
 ## Performance Tuning
 ### Micro-Batch Size
 ```python
 # Pipeline parallelism requires micro-batching
 megatron_plugin = MegatronLMPlugin(
    pp_degree=4,
    num_micro_batches=16,  # 16 micro-batches per pipeline
 )
 # Effective batch = num_micro_batches × micro_batch_size × DP
 # Example: 16 × 2 × 4 = 128
 ```
 **Recommendations**:
 - More micro-batches → less pipeline bubble
 - Typical: 4-16 micro-batches
 ### Sequence Length
 ```python
 # For long sequences, enable sequence parallelism
 megatron_plugin = MegatronLMPlugin(
    tp_degree=4,
    sequence_parallelism=True,  # Required: TP > 1
 )
 # Enables sequences up to TP × normal limit
 # Example: TP=4, 8K normal → 32K with sequence parallel
 ```
 ### GPU Topology
 **NVLink required for TP**:
 ```bash
 # Check NVLink topology
 nvidia-smi topo -m
 # Good topology (NVLink between all GPUs)
 # GPU0 - GPU1: NV12 (fast)
 # GPU0 - GPU2: NV12 (fast)
 # Bad topology (PCIe only)
 # GPU0 - GPU4: PHB (slow, avoid TP across these)
 ```
 **Recommendations**:
 - **TP**: Within same node (NVLink)
 - **PP**: Across nodes (slower interconnect OK)
 - **DP**: Any topology
 ## Model Size Guidelines
 | Model Size | GPUs | TP | PP | DP | Micro-Batches |
 |------------|------|----|----|----|--------------|
 | 7B | 8 | 1 | 1 | 8 | 1 |
 | 13B | 8 | 2 | 1 | 4 | 1 |
 | 20B | 16 | 4 | 1 | 4 | 1 |
 | 40B | 32 | 4 | 2 | 4 | 4 |
 | 70B | 64 | 8 | 2 | 4 | 8 |
 | 175B | 128 | 8 | 4 | 4 | 16 |
 **Assumptions**: BF16, 2K sequence length, A100 80GB
 ## Checkpointing
 ### Save Checkpoint
 ```python
 # Save full model state
 accelerator.save_state('checkpoint-1000')
 # Megatron saves separate files per rank
 # checkpoint-1000/
 #   pytorch_model_tp_0_pp_0.bin
 #   pytorch_model_tp_0_pp_1.bin
 #   pytorch_model_tp_1_pp_0.bin
 #   pytorch_model_tp_1_pp_1.bin
 #   optimizer_tp_0_pp_0.bin
 #   ...
 ```
 ### Load Checkpoint
 ```python
 # Resume training
 accelerator.load_state('checkpoint-1000')
 # Automatically loads correct shard per rank
 ```
 ### Convert to Standard PyTorch
 ```bash
 # Merge Megatron checkpoint to single file
 python merge_megatron_checkpoint.py \
  --checkpoint-dir checkpoint-1000 \
  --output pytorch_model.bin
 ```
 ## Common Issues
 ### Issue: OOM with Pipeline Parallelism
 **Solution**: Increase micro-batches
 ```python
 megatron_plugin = MegatronLMPlugin(
    pp_degree=4,
    num_micro_batches=16,  # Increase from 4
 )
 ```
 ### Issue: Slow Training
 **Check 1**: Pipeline bubbles (PP too high)
 ```python
 # Reduce PP, increase TP
 tp_degree=4  # Increase
 pp_degree=2  # Decrease
 ```
 **Check 2**: Micro-batch size too small
 ```python
 num_micro_batches=8  # Increase
 ```
 ### Issue: NVLink Not Detected
 ```bash
 # Verify NVLink
 nvidia-smi nvlink -s
 # If no NVLink, avoid TP > 1
 # Use PP or DP instead
 ```
 ## Resources
 - Megatron-LM: https://github.com/NVIDIA/Megatron-LM
 - Accelerate Megatron docs: https://huggingface.co/docs/accelerate/usage_guides/megatron_lm
 - Paper: "Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism"
 - NVIDIA Apex: https://github.com/NVIDIA/apex
--- a/skills/mlops/accelerate/references/performance.md
+++ b/skills/mlops/accelerate/references/performance.md
@ -0,0 +1,525 @@
 # Accelerate Performance Tuning
 ## Profiling
 ### Basic Profiling
 ```python
 from accelerate import Accelerator
 import time
 accelerator = Accelerator()
 # Warmup
 for _ in range(10):
    batch = next(iter(dataloader))
    outputs = model(**batch)
    loss = outputs.loss
    accelerator.backward(loss)
    optimizer.step()
    optimizer.zero_grad()
 # Profile training loop
 start = time.time()
 total_batches = 100
 for i, batch in enumerate(dataloader):
    if i >= total_batches:
        break
    outputs = model(**batch)
    loss = outputs.loss
    accelerator.backward(loss)
    optimizer.step()
    optimizer.zero_grad()
 accelerator.wait_for_everyone()  # Sync all processes
 elapsed = time.time() - start
 # Metrics
 batches_per_sec = total_batches / elapsed
 samples_per_sec = (total_batches * batch_size * accelerator.num_processes) / elapsed
 print(f"Throughput: {samples_per_sec:.2f} samples/sec")
 print(f"Batches/sec: {batches_per_sec:.2f}")
 ```
 ### PyTorch Profiler Integration
 ```python
 from torch.profiler import profile, ProfilerActivity
 with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    record_shapes=True,
    profile_memory=True,
    with_stack=True
 ) as prof:
    for i, batch in enumerate(dataloader):
        if i >= 10:  # Profile first 10 batches
            break
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()
 # Print profiling results
 print(prof.key_averages().table(
    sort_by="cuda_time_total", row_limit=20
 ))
 # Export to Chrome tracing
 prof.export_chrome_trace("trace.json")
 # View at chrome://tracing
 ```
 ## Memory Optimization
 ### 1. Gradient Accumulation
 **Problem**: Large batch size causes OOM
 **Solution**: Accumulate gradients across micro-batches
 ```python
 accelerator = Accelerator(gradient_accumulation_steps=8)
 # Effective batch = batch_size × accumulation_steps × num_gpus
 # Example: 4 × 8 × 8 = 256
 for batch in dataloader:
    with accelerator.accumulate(model):  # Handles accumulation logic
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()
 ```
 **Memory savings**: 8× less activation memory (with 8 accumulation steps)
 ### 2. Gradient Checkpointing
 **Enable in model**:
 ```python
 from transformers import AutoModelForCausalLM
 model = AutoModelForCausalLM.from_pretrained(
    "gpt2",
    use_cache=False  # Required for gradient checkpointing
 )
 # Enable checkpointing
 model.gradient_checkpointing_enable()
 # Prepare with Accelerate
 model = accelerator.prepare(model)
 ```
 **Memory savings**: 30-50% with 10-15% slowdown
 ### 3. Mixed Precision
 **BF16 (A100/H100)**:
 ```python
 accelerator = Accelerator(mixed_precision='bf16')
 # Automatic mixed precision
 for batch in dataloader:
    outputs = model(**batch)  # Forward in BF16
    loss = outputs.loss
    accelerator.backward(loss)  # Backward in FP32
    optimizer.step()
 ```
 **FP16 (V100, older GPUs)**:
 ```python
 from accelerate.utils import GradScalerKwargs
 scaler_kwargs = GradScalerKwargs(
    init_scale=2.**16,
    growth_interval=2000
 )
 accelerator = Accelerator(
    mixed_precision='fp16',
    kwargs_handlers=[scaler_kwargs]
 )
 ```
 **Memory savings**: 50% compared to FP32
 ### 4. CPU Offloading (DeepSpeed)
 ```python
 from accelerate.utils import DeepSpeedPlugin
 ds_plugin = DeepSpeedPlugin(
    zero_stage=3,
    offload_optimizer_device="cpu",  # Offload optimizer to CPU
    offload_param_device="cpu",      # Offload parameters to CPU
 )
 accelerator = Accelerator(
    deepspeed_plugin=ds_plugin,
    mixed_precision='bf16'
 )
 ```
 **Memory savings**: 10-20× for optimizer state, 5-10× for parameters
 **Trade-off**: 20-30% slower due to CPU-GPU transfers
 ### 5. Flash Attention
 ```python
 # Install flash-attn
 # pip install flash-attn
 from transformers import AutoModelForCausalLM
 model = AutoModelForCausalLM.from_pretrained(
    "gpt2",
    attn_implementation="flash_attention_2"  # Enable Flash Attention 2
 )
 model = accelerator.prepare(model)
 ```
 **Memory savings**: 50% for attention, 2× faster
 **Requirements**: A100/H100, sequence length must be multiple of 128
 ## Communication Optimization
 ### 1. Gradient Bucketing (DDP)
 ```python
 from accelerate.utils import DistributedDataParallelKwargs
 ddp_kwargs = DistributedDataParallelKwargs(
    bucket_cap_mb=25,  # Bucket size for gradient reduction
    gradient_as_bucket_view=True,  # Reduce memory copies
    static_graph=False  # Set True if model doesn't change
 )
 accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
 ```
 **Recommended bucket sizes**:
 - Small models (<1B): 25 MB
 - Medium models (1-10B): 50-100 MB
 - Large models (>10B): 100-200 MB
 ### 2. Find Unused Parameters
 ```python
 # Only enable if model has unused parameters (slower!)
 ddp_kwargs = DistributedDataParallelKwargs(
    find_unused_parameters=True
 )
 ```
 **Use case**: Models with conditional branches (e.g., mixture of experts)
 **Cost**: 10-20% slower
 ### 3. NCCL Tuning
 ```bash
 # Set environment variables before launch
 export NCCL_DEBUG=INFO           # Debug info
 export NCCL_IB_DISABLE=0         # Enable InfiniBand
 export NCCL_SOCKET_IFNAME=eth0   # Network interface
 export NCCL_P2P_LEVEL=NVL        # Use NVLink
 accelerate launch train.py
 ```
 **NCCL_P2P_LEVEL options**:
 - `NVL`: NVLink (fastest, within node)
 - `PIX`: PCIe (fast, within node)
 - `PHB`: PCIe host bridge (slow, cross-node)
 ## Data Loading Optimization
 ### 1. DataLoader Workers
 ```python
 from torch.utils.data import DataLoader
 train_loader = DataLoader(
    dataset,
    batch_size=32,
    num_workers=4,      # Parallel data loading
    pin_memory=True,    # Pin memory for faster GPU transfer
    prefetch_factor=2,  # Prefetch batches per worker
    persistent_workers=True  # Keep workers alive between epochs
 )
 train_loader = accelerator.prepare(train_loader)
 ```
 **Recommendations**:
 - `num_workers`: 2-4 per GPU (8 GPUs → 16-32 workers)
 - `pin_memory`: Always True for GPU training
 - `prefetch_factor`: 2-4 (higher for slow data loading)
 ### 2. Data Preprocessing
 ```python
 from datasets import load_dataset
 # Bad: Preprocess during training (slow)
 dataset = load_dataset("openwebtext")
 for batch in dataset:
    tokens = tokenizer(batch['text'])  # Slow!
    ...
 # Good: Preprocess once, save
 dataset = load_dataset("openwebtext")
 tokenized = dataset.map(
    lambda x: tokenizer(x['text']),
    batched=True,
    num_proc=8,  # Parallel preprocessing
    remove_columns=['text']
 )
 tokenized.save_to_disk("preprocessed_data")
 # Load preprocessed
 dataset = load_from_disk("preprocessed_data")
 ```
 ### 3. Faster Tokenization
 ```python
 import os
 # Enable Rust-based tokenizers (10× faster)
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
 from transformers import AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained(
    "gpt2",
    use_fast=True  # Use fast Rust tokenizer
 )
 ```
 ## Compilation (PyTorch 2.0+)
 ### Compile Model
 ```python
 import torch
 # Compile model for faster execution
 model = torch.compile(
    model,
    mode="reduce-overhead",  # Options: default, reduce-overhead, max-autotune
    fullgraph=False,         # Compile entire graph (stricter)
    dynamic=True             # Support dynamic shapes
 )
 model = accelerator.prepare(model)
 ```
 **Speedup**: 10-50% depending on model
 **Compilation modes**:
 - `default`: Balanced (best for most cases)
 - `reduce-overhead`: Min overhead (best for small batches)
 - `max-autotune`: Max performance (slow compile, best for production)
 ### Compilation Best Practices
 ```python
 # Bad: Compile after prepare (won't work)
 model = accelerator.prepare(model)
 model = torch.compile(model)  # Error!
 # Good: Compile before prepare
 model = torch.compile(model)
 model = accelerator.prepare(model)
 # Training loop
 for batch in dataloader:
    # First iteration: slow (compilation)
    # Subsequent iterations: fast (compiled)
    outputs = model(**batch)
    ...
 ```
 ## Benchmarking Different Strategies
 ### Script Template
 ```python
 import time
 import torch
 from accelerate import Accelerator
 def benchmark_strategy(strategy_name, accelerator_kwargs):
    """Benchmark a specific training strategy."""
    accelerator = Accelerator(**accelerator_kwargs)
    # Setup
    model = create_model()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
    dataloader = create_dataloader()
    model, optimizer, dataloader = accelerator.prepare(
        model, optimizer, dataloader
    )
    # Warmup
    for i, batch in enumerate(dataloader):
        if i >= 10:
            break
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()
    # Benchmark
    accelerator.wait_for_everyone()
    torch.cuda.synchronize()
    start = time.time()
    num_batches = 100
    for i, batch in enumerate(dataloader):
        if i >= num_batches:
            break
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()
    accelerator.wait_for_everyone()
    torch.cuda.synchronize()
    elapsed = time.time() - start
    # Metrics
    throughput = (num_batches * batch_size * accelerator.num_processes) / elapsed
    memory_used = torch.cuda.max_memory_allocated() / 1e9  # GB
    if accelerator.is_main_process:
        print(f"\n{strategy_name}:")
        print(f"  Throughput: {throughput:.2f} samples/sec")
        print(f"  Memory: {memory_used:.2f} GB")
        print(f"  Time: {elapsed:.2f} sec")
    torch.cuda.reset_peak_memory_stats()
 # Benchmark different strategies
 strategies = [
    ("DDP + FP32", {}),
    ("DDP + BF16", {"mixed_precision": "bf16"}),
    ("DDP + BF16 + GradAccum", {"mixed_precision": "bf16", "gradient_accumulation_steps": 4}),
    ("FSDP", {"fsdp_plugin": fsdp_plugin}),
    ("DeepSpeed ZeRO-2", {"deepspeed_plugin": ds_plugin_stage2}),
    ("DeepSpeed ZeRO-3", {"deepspeed_plugin": ds_plugin_stage3}),
 ]
 for name, kwargs in strategies:
    benchmark_strategy(name, kwargs)
 ```
 ## Performance Checklist
 **Before training**:
 - [ ] Use BF16/FP16 mixed precision
 - [ ] Enable gradient checkpointing (if OOM)
 - [ ] Set appropriate `num_workers` (2-4 per GPU)
 - [ ] Enable `pin_memory=True`
 - [ ] Preprocess data once, not during training
 - [ ] Compile model with `torch.compile` (PyTorch 2.0+)
 **For large models**:
 - [ ] Use FSDP or DeepSpeed ZeRO-3
 - [ ] Enable CPU offloading (if still OOM)
 - [ ] Use Flash Attention
 - [ ] Increase gradient accumulation
 **For multi-node**:
 - [ ] Check network topology (InfiniBand > Ethernet)
 - [ ] Tune NCCL settings
 - [ ] Use larger bucket sizes for DDP
 - [ ] Verify NVLink for tensor parallelism
 **Profiling**:
 - [ ] Profile first 10-100 batches
 - [ ] Check GPU utilization (`nvidia-smi dmon`)
 - [ ] Check data loading time (should be <5% of iteration)
 - [ ] Identify communication bottlenecks
 ## Common Performance Issues
 ### Issue: Low GPU Utilization (<80%)
 **Cause 1**: Data loading bottleneck
 ```python
 # Solution: Increase workers and prefetch
 num_workers=8
 prefetch_factor=4
 ```
 **Cause 2**: Small batch size
 ```python
 # Solution: Increase batch size or use gradient accumulation
 batch_size=32  # Increase
 gradient_accumulation_steps=4  # Or accumulate
 ```
 ### Issue: High Memory Usage
 **Solution 1**: Gradient checkpointing
 ```python
 model.gradient_checkpointing_enable()
 ```
 **Solution 2**: Reduce batch size, increase accumulation
 ```python
 batch_size=8  # Reduce from 32
 gradient_accumulation_steps=16  # Maintain effective batch
 ```
 **Solution 3**: Use FSDP or DeepSpeed ZeRO-3
 ```python
 accelerator = Accelerator(fsdp_plugin=fsdp_plugin)
 ```
 ### Issue: Slow Multi-GPU Training
 **Cause**: Communication bottleneck
 **Check 1**: Gradient bucket size
 ```python
 ddp_kwargs = DistributedDataParallelKwargs(bucket_cap_mb=100)
 ```
 **Check 2**: NCCL settings
 ```bash
 export NCCL_DEBUG=INFO
 # Check for "Using NVLS" (good) vs "Using PHB" (bad)
 ```
 **Check 3**: Network bandwidth
 ```bash
 # Test inter-GPU bandwidth
 nvidia-smi nvlink -s
 ```
 ## Resources
 - Accelerate Performance: https://huggingface.co/docs/accelerate/usage_guides/performance
 - PyTorch Profiler: https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html
 - NCCL Tuning: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
 - Flash Attention: https://github.com/Dao-AILab/flash-attention
--- a/skills/mlops/audiocraft/SKILL.md
+++ b/skills/mlops/audiocraft/SKILL.md
@ -0,0 +1,564 @@
 ---
 name: audiocraft-audio-generation
 description: PyTorch library for audio generation including text-to-music (MusicGen) and text-to-sound (AudioGen). Use when you need to generate music from text descriptions, create sound effects, or perform melody-conditioned music generation.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Multimodal, Audio Generation, Text-to-Music, Text-to-Audio, MusicGen]
 dependencies: [audiocraft, torch>=2.0.0, transformers>=4.30.0]
 ---
 # AudioCraft: Audio Generation
 Comprehensive guide to using Meta's AudioCraft for text-to-music and text-to-audio generation with MusicGen, AudioGen, and EnCodec.
 ## When to use AudioCraft
 **Use AudioCraft when:**
 - Need to generate music from text descriptions
 - Creating sound effects and environmental audio
 - Building music generation applications
 - Need melody-conditioned music generation
 - Want stereo audio output
 - Require controllable music generation with style transfer
 **Key features:**
 - **MusicGen**: Text-to-music generation with melody conditioning
 - **AudioGen**: Text-to-sound effects generation
 - **EnCodec**: High-fidelity neural audio codec
 - **Multiple model sizes**: Small (300M) to Large (3.3B)
 - **Stereo support**: Full stereo audio generation
 - **Style conditioning**: MusicGen-Style for reference-based generation
 **Use alternatives instead:**
 - **Stable Audio**: For longer commercial music generation
 - **Bark**: For text-to-speech with music/sound effects
 - **Riffusion**: For spectogram-based music generation
 - **OpenAI Jukebox**: For raw audio generation with lyrics
 ## Quick start
 ### Installation
 ```bash
 # From PyPI
 pip install audiocraft
 # From GitHub (latest)
 pip install git+https://github.com/facebookresearch/audiocraft.git
 # Or use HuggingFace Transformers
 pip install transformers torch torchaudio
 ```
 ### Basic text-to-music (AudioCraft)
 ```python
 import torchaudio
 from audiocraft.models import MusicGen
 # Load model
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Set generation parameters
 model.set_generation_params(
    duration=8,  # seconds
    top_k=250,
    temperature=1.0
 )
 # Generate from text
 descriptions = ["happy upbeat electronic dance music with synths"]
 wav = model.generate(descriptions)
 # Save audio
 torchaudio.save("output.wav", wav[0].cpu(), sample_rate=32000)
 ```
 ### Using HuggingFace Transformers
 ```python
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 import scipy
 # Load model and processor
 processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
 model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
 model.to("cuda")
 # Generate music
 inputs = processor(
    text=["80s pop track with bassy drums and synth"],
    padding=True,
    return_tensors="pt"
 ).to("cuda")
 audio_values = model.generate(
    **inputs,
    do_sample=True,
    guidance_scale=3,
    max_new_tokens=256
 )
 # Save
 sampling_rate = model.config.audio_encoder.sampling_rate
 scipy.io.wavfile.write("output.wav", rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())
 ```
 ### Text-to-sound with AudioGen
 ```python
 from audiocraft.models import AudioGen
 # Load AudioGen
 model = AudioGen.get_pretrained('facebook/audiogen-medium')
 model.set_generation_params(duration=5)
 # Generate sound effects
 descriptions = ["dog barking in a park with birds chirping"]
 wav = model.generate(descriptions)
 torchaudio.save("sound.wav", wav[0].cpu(), sample_rate=16000)
 ```
 ## Core concepts
 ### Architecture overview
 ```
 AudioCraft Architecture:
 ┌──────────────────────────────────────────────────────────────┐
 │                    Text Encoder (T5)                          │
 │                         │                                     │
 │                    Text Embeddings                            │
 └────────────────────────┬─────────────────────────────────────┘
                         │
 ┌────────────────────────▼─────────────────────────────────────┐
 │              Transformer Decoder (LM)                         │
 │     Auto-regressively generates audio tokens                  │
 │     Using efficient token interleaving patterns               │
 └────────────────────────┬─────────────────────────────────────┘
                         │
 ┌────────────────────────▼─────────────────────────────────────┐
 │                EnCodec Audio Decoder                          │
 │        Converts tokens back to audio waveform                 │
 └──────────────────────────────────────────────────────────────┘
 ```
 ### Model variants
 | Model | Size | Description | Use Case |
 |-------|------|-------------|----------|
 | `musicgen-small` | 300M | Text-to-music | Quick generation |
 | `musicgen-medium` | 1.5B | Text-to-music | Balanced |
 | `musicgen-large` | 3.3B | Text-to-music | Best quality |
 | `musicgen-melody` | 1.5B | Text + melody | Melody conditioning |
 | `musicgen-melody-large` | 3.3B | Text + melody | Best melody |
 | `musicgen-stereo-*` | Varies | Stereo output | Stereo generation |
 | `musicgen-style` | 1.5B | Style transfer | Reference-based |
 | `audiogen-medium` | 1.5B | Text-to-sound | Sound effects |
 ### Generation parameters
 | Parameter | Default | Description |
 |-----------|---------|-------------|
 | `duration` | 8.0 | Length in seconds (1-120) |
 | `top_k` | 250 | Top-k sampling |
 | `top_p` | 0.0 | Nucleus sampling (0 = disabled) |
 | `temperature` | 1.0 | Sampling temperature |
 | `cfg_coef` | 3.0 | Classifier-free guidance |
 ## MusicGen usage
 ### Text-to-music generation
 ```python
 from audiocraft.models import MusicGen
 import torchaudio
 model = MusicGen.get_pretrained('facebook/musicgen-medium')
 # Configure generation
 model.set_generation_params(
    duration=30,          # Up to 30 seconds
    top_k=250,            # Sampling diversity
    top_p=0.0,            # 0 = use top_k only
    temperature=1.0,      # Creativity (higher = more varied)
    cfg_coef=3.0          # Text adherence (higher = stricter)
 )
 # Generate multiple samples
 descriptions = [
    "epic orchestral soundtrack with strings and brass",
    "chill lo-fi hip hop beat with jazzy piano",
    "energetic rock song with electric guitar"
 ]
 # Generate (returns [batch, channels, samples])
 wav = model.generate(descriptions)
 # Save each
 for i, audio in enumerate(wav):
    torchaudio.save(f"music_{i}.wav", audio.cpu(), sample_rate=32000)
 ```
 ### Melody-conditioned generation
 ```python
 from audiocraft.models import MusicGen
 import torchaudio
 # Load melody model
 model = MusicGen.get_pretrained('facebook/musicgen-melody')
 model.set_generation_params(duration=30)
 # Load melody audio
 melody, sr = torchaudio.load("melody.wav")
 # Generate with melody conditioning
 descriptions = ["acoustic guitar folk song"]
 wav = model.generate_with_chroma(descriptions, melody, sr)
 torchaudio.save("melody_conditioned.wav", wav[0].cpu(), sample_rate=32000)
 ```
 ### Stereo generation
 ```python
 from audiocraft.models import MusicGen
 # Load stereo model
 model = MusicGen.get_pretrained('facebook/musicgen-stereo-medium')
 model.set_generation_params(duration=15)
 descriptions = ["ambient electronic music with wide stereo panning"]
 wav = model.generate(descriptions)
 # wav shape: [batch, 2, samples] for stereo
 print(f"Stereo shape: {wav.shape}")  # [1, 2, 480000]
 torchaudio.save("stereo.wav", wav[0].cpu(), sample_rate=32000)
 ```
 ### Audio continuation
 ```python
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 processor = AutoProcessor.from_pretrained("facebook/musicgen-medium")
 model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-medium")
 # Load audio to continue
 import torchaudio
 audio, sr = torchaudio.load("intro.wav")
 # Process with text and audio
 inputs = processor(
    audio=audio.squeeze().numpy(),
    sampling_rate=sr,
    text=["continue with a epic chorus"],
    padding=True,
    return_tensors="pt"
 )
 # Generate continuation
 audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=512)
 ```
 ## MusicGen-Style usage
 ### Style-conditioned generation
 ```python
 from audiocraft.models import MusicGen
 # Load style model
 model = MusicGen.get_pretrained('facebook/musicgen-style')
 # Configure generation with style
 model.set_generation_params(
    duration=30,
    cfg_coef=3.0,
    cfg_coef_beta=5.0  # Style influence
 )
 # Configure style conditioner
 model.set_style_conditioner_params(
    eval_q=3,          # RVQ quantizers (1-6)
    excerpt_length=3.0  # Style excerpt length
 )
 # Load style reference
 style_audio, sr = torchaudio.load("reference_style.wav")
 # Generate with text + style
 descriptions = ["upbeat dance track"]
 wav = model.generate_with_style(descriptions, style_audio, sr)
 ```
 ### Style-only generation (no text)
 ```python
 # Generate matching style without text prompt
 model.set_generation_params(
    duration=30,
    cfg_coef=3.0,
    cfg_coef_beta=None  # Disable double CFG for style-only
 )
 wav = model.generate_with_style([None], style_audio, sr)
 ```
 ## AudioGen usage
 ### Sound effect generation
 ```python
 from audiocraft.models import AudioGen
 import torchaudio
 model = AudioGen.get_pretrained('facebook/audiogen-medium')
 model.set_generation_params(duration=10)
 # Generate various sounds
 descriptions = [
    "thunderstorm with heavy rain and lightning",
    "busy city traffic with car horns",
    "ocean waves crashing on rocks",
    "crackling campfire in forest"
 ]
 wav = model.generate(descriptions)
 for i, audio in enumerate(wav):
    torchaudio.save(f"sound_{i}.wav", audio.cpu(), sample_rate=16000)
 ```
 ## EnCodec usage
 ### Audio compression
 ```python
 from audiocraft.models import CompressionModel
 import torch
 import torchaudio
 # Load EnCodec
 model = CompressionModel.get_pretrained('facebook/encodec_32khz')
 # Load audio
 wav, sr = torchaudio.load("audio.wav")
 # Ensure correct sample rate
 if sr != 32000:
    resampler = torchaudio.transforms.Resample(sr, 32000)
    wav = resampler(wav)
 # Encode to tokens
 with torch.no_grad():
    encoded = model.encode(wav.unsqueeze(0))
    codes = encoded[0]  # Audio codes
 # Decode back to audio
 with torch.no_grad():
    decoded = model.decode(codes)
 torchaudio.save("reconstructed.wav", decoded[0].cpu(), sample_rate=32000)
 ```
 ## Common workflows
 ### Workflow 1: Music generation pipeline
 ```python
 import torch
 import torchaudio
 from audiocraft.models import MusicGen
 class MusicGenerator:
    def __init__(self, model_name="facebook/musicgen-medium"):
        self.model = MusicGen.get_pretrained(model_name)
        self.sample_rate = 32000
    def generate(self, prompt, duration=30, temperature=1.0, cfg=3.0):
        self.model.set_generation_params(
            duration=duration,
            top_k=250,
            temperature=temperature,
            cfg_coef=cfg
        )
        with torch.no_grad():
            wav = self.model.generate([prompt])
        return wav[0].cpu()
    def generate_batch(self, prompts, duration=30):
        self.model.set_generation_params(duration=duration)
        with torch.no_grad():
            wav = self.model.generate(prompts)
        return wav.cpu()
    def save(self, audio, path):
        torchaudio.save(path, audio, sample_rate=self.sample_rate)
 # Usage
 generator = MusicGenerator()
 audio = generator.generate(
    "epic cinematic orchestral music",
    duration=30,
    temperature=1.0
 )
 generator.save(audio, "epic_music.wav")
 ```
 ### Workflow 2: Sound design batch processing
 ```python
 import json
 from pathlib import Path
 from audiocraft.models import AudioGen
 import torchaudio
 def batch_generate_sounds(sound_specs, output_dir):
    """
    Generate multiple sounds from specifications.
    Args:
        sound_specs: list of {"name": str, "description": str, "duration": float}
        output_dir: output directory path
    """
    model = AudioGen.get_pretrained('facebook/audiogen-medium')
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)
    results = []
    for spec in sound_specs:
        model.set_generation_params(duration=spec.get("duration", 5))
        wav = model.generate([spec["description"]])
        output_path = output_dir / f"{spec['name']}.wav"
        torchaudio.save(str(output_path), wav[0].cpu(), sample_rate=16000)
        results.append({
            "name": spec["name"],
            "path": str(output_path),
            "description": spec["description"]
        })
    return results
 # Usage
 sounds = [
    {"name": "explosion", "description": "massive explosion with debris", "duration": 3},
    {"name": "footsteps", "description": "footsteps on wooden floor", "duration": 5},
    {"name": "door", "description": "wooden door creaking and closing", "duration": 2}
 ]
 results = batch_generate_sounds(sounds, "sound_effects/")
 ```
 ### Workflow 3: Gradio demo
 ```python
 import gradio as gr
 import torch
 import torchaudio
 from audiocraft.models import MusicGen
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 def generate_music(prompt, duration, temperature, cfg_coef):
    model.set_generation_params(
        duration=duration,
        temperature=temperature,
        cfg_coef=cfg_coef
    )
    with torch.no_grad():
        wav = model.generate([prompt])
    # Save to temp file
    path = "temp_output.wav"
    torchaudio.save(path, wav[0].cpu(), sample_rate=32000)
    return path
 demo = gr.Interface(
    fn=generate_music,
    inputs=[
        gr.Textbox(label="Music Description", placeholder="upbeat electronic dance music"),
        gr.Slider(1, 30, value=8, label="Duration (seconds)"),
        gr.Slider(0.5, 2.0, value=1.0, label="Temperature"),
        gr.Slider(1.0, 10.0, value=3.0, label="CFG Coefficient")
    ],
    outputs=gr.Audio(label="Generated Music"),
    title="MusicGen Demo"
 )
 demo.launch()
 ```
 ## Performance optimization
 ### Memory optimization
 ```python
 # Use smaller model
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Clear cache between generations
 torch.cuda.empty_cache()
 # Generate shorter durations
 model.set_generation_params(duration=10)  # Instead of 30
 # Use half precision
 model = model.half()
 ```
 ### Batch processing efficiency
 ```python
 # Process multiple prompts at once (more efficient)
 descriptions = ["prompt1", "prompt2", "prompt3", "prompt4"]
 wav = model.generate(descriptions)  # Single batch
 # Instead of
 for desc in descriptions:
    wav = model.generate([desc])  # Multiple batches (slower)
 ```
 ### GPU memory requirements
 | Model | FP32 VRAM | FP16 VRAM |
 |-------|-----------|-----------|
 | musicgen-small | ~4GB | ~2GB |
 | musicgen-medium | ~8GB | ~4GB |
 | musicgen-large | ~16GB | ~8GB |
 ## Common issues
 | Issue | Solution |
 |-------|----------|
 | CUDA OOM | Use smaller model, reduce duration |
 | Poor quality | Increase cfg_coef, better prompts |
 | Generation too short | Check max duration setting |
 | Audio artifacts | Try different temperature |
 | Stereo not working | Use stereo model variant |
 ## References
 - **[Advanced Usage](references/advanced-usage.md)** - Training, fine-tuning, deployment
 - **[Troubleshooting](references/troubleshooting.md)** - Common issues and solutions
 ## Resources
 - **GitHub**: https://github.com/facebookresearch/audiocraft
 - **Paper (MusicGen)**: https://arxiv.org/abs/2306.05284
 - **Paper (AudioGen)**: https://arxiv.org/abs/2209.15352
 - **HuggingFace**: https://huggingface.co/facebook/musicgen-small
 - **Demo**: https://huggingface.co/spaces/facebook/MusicGen
--- a/skills/mlops/audiocraft/references/advanced-usage.md
+++ b/skills/mlops/audiocraft/references/advanced-usage.md
@ -0,0 +1,666 @@
 # AudioCraft Advanced Usage Guide
 ## Fine-tuning MusicGen
 ### Custom dataset preparation
 ```python
 import os
 import json
 from pathlib import Path
 import torchaudio
 def prepare_dataset(audio_dir, output_dir, metadata_file):
    """
    Prepare dataset for MusicGen fine-tuning.
    Directory structure:
    output_dir/
    ├── audio/
    │   ├── 0001.wav
    │   ├── 0002.wav
    │   └── ...
    └── metadata.json
    """
    output_dir = Path(output_dir)
    audio_output = output_dir / "audio"
    audio_output.mkdir(parents=True, exist_ok=True)
    # Load metadata (format: {"path": "...", "description": "..."})
    with open(metadata_file) as f:
        metadata = json.load(f)
    processed = []
    for idx, item in enumerate(metadata):
        audio_path = Path(audio_dir) / item["path"]
        # Load and resample to 32kHz
        wav, sr = torchaudio.load(str(audio_path))
        if sr != 32000:
            resampler = torchaudio.transforms.Resample(sr, 32000)
            wav = resampler(wav)
        # Convert to mono if stereo
        if wav.shape[0] > 1:
            wav = wav.mean(dim=0, keepdim=True)
        # Save processed audio
        output_path = audio_output / f"{idx:04d}.wav"
        torchaudio.save(str(output_path), wav, sample_rate=32000)
        processed.append({
            "path": str(output_path.relative_to(output_dir)),
            "description": item["description"],
            "duration": wav.shape[1] / 32000
        })
    # Save processed metadata
    with open(output_dir / "metadata.json", "w") as f:
        json.dump(processed, f, indent=2)
    print(f"Processed {len(processed)} samples")
    return processed
 ```
 ### Fine-tuning with dora
 ```bash
 # AudioCraft uses dora for experiment management
 # Install dora
 pip install dora-search
 # Clone AudioCraft
 git clone https://github.com/facebookresearch/audiocraft.git
 cd audiocraft
 # Create config for fine-tuning
 cat > config/solver/musicgen/finetune.yaml << 'EOF'
 defaults:
  - musicgen/musicgen_base
  - /model: lm/musicgen_lm
  - /conditioner: cond_base
 solver: musicgen
 autocast: true
 autocast_dtype: float16
 optim:
  epochs: 100
  batch_size: 4
  lr: 1e-4
  ema: 0.999
  optimizer: adamw
 dataset:
  batch_size: 4
  num_workers: 4
  train:
    - dset: your_dataset
      root: /path/to/dataset
  valid:
    - dset: your_dataset
      root: /path/to/dataset
 checkpoint:
  save_every: 10
  keep_every_states: null
 EOF
 # Run fine-tuning
 dora run solver=musicgen/finetune
 ```
 ### LoRA fine-tuning
 ```python
 from peft import LoraConfig, get_peft_model
 from audiocraft.models import MusicGen
 import torch
 # Load base model
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Get the language model component
 lm = model.lm
 # Configure LoRA
 lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj", "k_proj", "out_proj"],
    lora_dropout=0.05,
    bias="none"
 )
 # Apply LoRA
 lm = get_peft_model(lm, lora_config)
 lm.print_trainable_parameters()
 ```
 ## Multi-GPU Training
 ### DataParallel
 ```python
 import torch
 import torch.nn as nn
 from audiocraft.models import MusicGen
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Wrap LM with DataParallel
 if torch.cuda.device_count() > 1:
    model.lm = nn.DataParallel(model.lm)
 model.to("cuda")
 ```
 ### DistributedDataParallel
 ```python
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 def setup(rank, world_size):
    dist.init_process_group("nccl", rank=rank, world_size=world_size)
    torch.cuda.set_device(rank)
 def train(rank, world_size):
    setup(rank, world_size)
    model = MusicGen.get_pretrained('facebook/musicgen-small')
    model.lm = model.lm.to(rank)
    model.lm = DDP(model.lm, device_ids=[rank])
    # Training loop
    # ...
    dist.destroy_process_group()
 ```
 ## Custom Conditioning
 ### Adding new conditioners
 ```python
 from audiocraft.modules.conditioners import BaseConditioner
 import torch
 class CustomConditioner(BaseConditioner):
    """Custom conditioner for additional control signals."""
    def __init__(self, dim, output_dim):
        super().__init__(dim, output_dim)
        self.embed = torch.nn.Linear(dim, output_dim)
    def forward(self, x):
        return self.embed(x)
    def tokenize(self, x):
        # Tokenize input for conditioning
        return x
 # Use with MusicGen
 from audiocraft.models.builders import get_lm_model
 # Modify model config to include custom conditioner
 # This requires editing the model configuration
 ```
 ### Melody conditioning internals
 ```python
 from audiocraft.models import MusicGen
 from audiocraft.modules.codebooks_patterns import DelayedPatternProvider
 import torch
 model = MusicGen.get_pretrained('facebook/musicgen-melody')
 # Access chroma extractor
 chroma_extractor = model.lm.condition_provider.conditioners.get('chroma')
 # Manual chroma extraction
 def extract_chroma(audio, sr):
    """Extract chroma features from audio."""
    import librosa
    # Compute chroma
    chroma = librosa.feature.chroma_cqt(y=audio.numpy(), sr=sr)
    return torch.from_numpy(chroma).float()
 # Use extracted chroma for conditioning
 chroma = extract_chroma(melody_audio, sample_rate)
 ```
 ## EnCodec Deep Dive
 ### Custom compression settings
 ```python
 from audiocraft.models import CompressionModel
 import torch
 # Load EnCodec
 encodec = CompressionModel.get_pretrained('facebook/encodec_32khz')
 # Access codec parameters
 print(f"Sample rate: {encodec.sample_rate}")
 print(f"Channels: {encodec.channels}")
 print(f"Cardinality: {encodec.cardinality}")  # Codebook size
 print(f"Num codebooks: {encodec.num_codebooks}")
 print(f"Frame rate: {encodec.frame_rate}")
 # Encode with specific bandwidth
 # Lower bandwidth = more compression, lower quality
 encodec.set_target_bandwidth(6.0)  # 6 kbps
 audio = torch.randn(1, 1, 32000)  # 1 second
 encoded = encodec.encode(audio)
 decoded = encodec.decode(encoded[0])
 ```
 ### Streaming encoding
 ```python
 import torch
 from audiocraft.models import CompressionModel
 encodec = CompressionModel.get_pretrained('facebook/encodec_32khz')
 def encode_streaming(audio_stream, chunk_size=32000):
    """Encode audio in streaming fashion."""
    all_codes = []
    for chunk in audio_stream:
        # Ensure chunk is right shape
        if chunk.dim() == 1:
            chunk = chunk.unsqueeze(0).unsqueeze(0)
        with torch.no_grad():
            codes = encodec.encode(chunk)[0]
            all_codes.append(codes)
    return torch.cat(all_codes, dim=-1)
 def decode_streaming(codes_stream, output_stream):
    """Decode codes in streaming fashion."""
    for codes in codes_stream:
        with torch.no_grad():
            audio = encodec.decode(codes)
            output_stream.write(audio.cpu().numpy())
 ```
 ## MultiBand Diffusion
 ### Using MBD for enhanced quality
 ```python
 from audiocraft.models import MusicGen, MultiBandDiffusion
 # Load MusicGen
 model = MusicGen.get_pretrained('facebook/musicgen-medium')
 # Load MultiBand Diffusion
 mbd = MultiBandDiffusion.get_mbd_musicgen()
 model.set_generation_params(duration=10)
 # Generate with standard decoder
 descriptions = ["epic orchestral music"]
 wav_standard = model.generate(descriptions)
 # Generate tokens and use MBD decoder
 with torch.no_grad():
    # Get tokens
    gen_tokens = model.generate_tokens(descriptions)
    # Decode with MBD
    wav_mbd = mbd.tokens_to_wav(gen_tokens)
 # Compare quality
 print(f"Standard shape: {wav_standard.shape}")
 print(f"MBD shape: {wav_mbd.shape}")
 ```
 ## API Server Deployment
 ### FastAPI server
 ```python
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import torch
 import torchaudio
 from audiocraft.models import MusicGen
 import io
 import base64
 app = FastAPI()
 # Load model at startup
 model = None
@app.on_event("startup")
 async def load_model():
    global model
    model = MusicGen.get_pretrained('facebook/musicgen-small')
    model.set_generation_params(duration=10)
 class GenerateRequest(BaseModel):
    prompt: str
    duration: float = 10.0
    temperature: float = 1.0
    cfg_coef: float = 3.0
 class GenerateResponse(BaseModel):
    audio_base64: str
    sample_rate: int
    duration: float
@app.post("/generate", response_model=GenerateResponse)
 async def generate(request: GenerateRequest):
    if model is None:
        raise HTTPException(status_code=500, detail="Model not loaded")
    try:
        model.set_generation_params(
            duration=min(request.duration, 30),
            temperature=request.temperature,
            cfg_coef=request.cfg_coef
        )
        with torch.no_grad():
            wav = model.generate([request.prompt])
        # Convert to bytes
        buffer = io.BytesIO()
        torchaudio.save(buffer, wav[0].cpu(), sample_rate=32000, format="wav")
        buffer.seek(0)
        audio_base64 = base64.b64encode(buffer.read()).decode()
        return GenerateResponse(
            audio_base64=audio_base64,
            sample_rate=32000,
            duration=wav.shape[-1] / 32000
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
 async def health():
    return {"status": "ok", "model_loaded": model is not None}
 # Run: uvicorn server:app --host 0.0.0.0 --port 8000
 ```
 ### Batch processing service
 ```python
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import torch
 from audiocraft.models import MusicGen
 class MusicGenService:
    def __init__(self, model_name='facebook/musicgen-small', max_workers=2):
        self.model = MusicGen.get_pretrained(model_name)
        self.executor = ThreadPoolExecutor(max_workers=max_workers)
        self.lock = asyncio.Lock()
    async def generate_async(self, prompt, duration=10):
        """Async generation with thread pool."""
        loop = asyncio.get_event_loop()
        def _generate():
            with torch.no_grad():
                self.model.set_generation_params(duration=duration)
                return self.model.generate([prompt])
        # Run in thread pool
        wav = await loop.run_in_executor(self.executor, _generate)
        return wav[0].cpu()
    async def generate_batch_async(self, prompts, duration=10):
        """Process multiple prompts concurrently."""
        tasks = [self.generate_async(p, duration) for p in prompts]
        return await asyncio.gather(*tasks)
 # Usage
 service = MusicGenService()
 async def main():
    prompts = ["jazz piano", "rock guitar", "electronic beats"]
    results = await service.generate_batch_async(prompts)
    return results
 ```
 ## Integration Patterns
 ### LangChain tool
 ```python
 from langchain.tools import BaseTool
 import torch
 import torchaudio
 from audiocraft.models import MusicGen
 import tempfile
 class MusicGeneratorTool(BaseTool):
    name = "music_generator"
    description = "Generate music from a text description. Input should be a detailed description of the music style, mood, and instruments."
    def __init__(self):
        super().__init__()
        self.model = MusicGen.get_pretrained('facebook/musicgen-small')
        self.model.set_generation_params(duration=15)
    def _run(self, description: str) -> str:
        with torch.no_grad():
            wav = self.model.generate([description])
        # Save to temp file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
            torchaudio.save(f.name, wav[0].cpu(), sample_rate=32000)
            return f"Generated music saved to: {f.name}"
    async def _arun(self, description: str) -> str:
        return self._run(description)
 ```
 ### Gradio with advanced controls
 ```python
 import gradio as gr
 import torch
 import torchaudio
 from audiocraft.models import MusicGen
 models = {}
 def load_model(model_size):
    if model_size not in models:
        model_name = f"facebook/musicgen-{model_size}"
        models[model_size] = MusicGen.get_pretrained(model_name)
    return models[model_size]
 def generate(prompt, duration, temperature, cfg_coef, top_k, model_size):
    model = load_model(model_size)
    model.set_generation_params(
        duration=duration,
        temperature=temperature,
        cfg_coef=cfg_coef,
        top_k=top_k
    )
    with torch.no_grad():
        wav = model.generate([prompt])
    # Save
    path = "output.wav"
    torchaudio.save(path, wav[0].cpu(), sample_rate=32000)
    return path
 demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Prompt", lines=3),
        gr.Slider(1, 30, value=10, label="Duration (s)"),
        gr.Slider(0.1, 2.0, value=1.0, label="Temperature"),
        gr.Slider(0.5, 10.0, value=3.0, label="CFG Coefficient"),
        gr.Slider(50, 500, value=250, step=50, label="Top-K"),
        gr.Dropdown(["small", "medium", "large"], value="small", label="Model Size")
    ],
    outputs=gr.Audio(label="Generated Music"),
    title="MusicGen Advanced",
    allow_flagging="never"
 )
 demo.launch(share=True)
 ```
 ## Audio Processing Pipeline
 ### Post-processing chain
 ```python
 import torch
 import torchaudio
 import torchaudio.transforms as T
 import numpy as np
 class AudioPostProcessor:
    def __init__(self, sample_rate=32000):
        self.sample_rate = sample_rate
    def normalize(self, audio, target_db=-14.0):
        """Normalize audio to target loudness."""
        rms = torch.sqrt(torch.mean(audio ** 2))
        target_rms = 10 ** (target_db / 20)
        gain = target_rms / (rms + 1e-8)
        return audio * gain
    def fade_in_out(self, audio, fade_duration=0.1):
        """Apply fade in/out."""
        fade_samples = int(fade_duration * self.sample_rate)
        # Create fade curves
        fade_in = torch.linspace(0, 1, fade_samples)
        fade_out = torch.linspace(1, 0, fade_samples)
        # Apply fades
        audio[..., :fade_samples] *= fade_in
        audio[..., -fade_samples:] *= fade_out
        return audio
    def apply_reverb(self, audio, decay=0.5):
        """Apply simple reverb effect."""
        impulse = torch.zeros(int(self.sample_rate * 0.5))
        impulse[0] = 1.0
        impulse[int(self.sample_rate * 0.1)] = decay * 0.5
        impulse[int(self.sample_rate * 0.2)] = decay * 0.25
        # Convolve
        audio = torch.nn.functional.conv1d(
            audio.unsqueeze(0),
            impulse.unsqueeze(0).unsqueeze(0),
            padding=len(impulse) // 2
        ).squeeze(0)
        return audio
    def process(self, audio):
        """Full processing pipeline."""
        audio = self.normalize(audio)
        audio = self.fade_in_out(audio)
        return audio
 # Usage with MusicGen
 from audiocraft.models import MusicGen
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 model.set_generation_params(duration=10)
 wav = model.generate(["chill ambient music"])
 processor = AudioPostProcessor()
 wav_processed = processor.process(wav[0].cpu())
 torchaudio.save("processed.wav", wav_processed, sample_rate=32000)
 ```
 ## Evaluation
 ### Audio quality metrics
 ```python
 import torch
 from audiocraft.metrics import CLAPTextConsistencyMetric
 from audiocraft.data.audio import audio_read
 def evaluate_generation(audio_path, text_prompt):
    """Evaluate generated audio quality."""
    # Load audio
    wav, sr = audio_read(audio_path)
    # CLAP consistency (text-audio alignment)
    clap_metric = CLAPTextConsistencyMetric()
    clap_score = clap_metric.compute(wav, [text_prompt])
    return {
        "clap_score": clap_score,
        "duration": wav.shape[-1] / sr
    }
 # Batch evaluation
 def evaluate_batch(generations):
    """Evaluate multiple generations."""
    results = []
    for gen in generations:
        result = evaluate_generation(gen["path"], gen["prompt"])
        result["prompt"] = gen["prompt"]
        results.append(result)
    # Aggregate
    avg_clap = sum(r["clap_score"] for r in results) / len(results)
    return {
        "individual": results,
        "average_clap": avg_clap
    }
 ```
 ## Model Comparison
 ### MusicGen variants benchmark
 | Model | CLAP Score | Generation Time (10s) | VRAM |
 |-------|------------|----------------------|------|
 | musicgen-small | 0.35 | ~5s | 2GB |
 | musicgen-medium | 0.42 | ~15s | 4GB |
 | musicgen-large | 0.48 | ~30s | 8GB |
 | musicgen-melody | 0.45 | ~15s | 4GB |
 | musicgen-stereo-medium | 0.41 | ~18s | 5GB |
 ### Prompt engineering tips
 ```python
 # Good prompts - specific and descriptive
 good_prompts = [
    "upbeat electronic dance music with synthesizer leads and punchy drums at 128 bpm",
    "melancholic piano ballad with strings, slow tempo, emotional and cinematic",
    "funky disco groove with slap bass, brass section, and rhythmic guitar"
 ]
 # Bad prompts - too vague
 bad_prompts = [
    "nice music",
    "song",
    "good beat"
 ]
 # Structure: [mood] [genre] with [instruments] at [tempo/style]
 ```
--- a/skills/mlops/audiocraft/references/troubleshooting.md
+++ b/skills/mlops/audiocraft/references/troubleshooting.md
@ -0,0 +1,504 @@
 # AudioCraft Troubleshooting Guide
 ## Installation Issues
 ### Import errors
 **Error**: `ModuleNotFoundError: No module named 'audiocraft'`
 **Solutions**:
 ```bash
 # Install from PyPI
 pip install audiocraft
 # Or from GitHub
 pip install git+https://github.com/facebookresearch/audiocraft.git
 # Verify installation
 python -c "from audiocraft.models import MusicGen; print('OK')"
 ```
 ### FFmpeg not found
 **Error**: `RuntimeError: ffmpeg not found`
 **Solutions**:
 ```bash
 # Ubuntu/Debian
 sudo apt-get install ffmpeg
 # macOS
 brew install ffmpeg
 # Windows (using conda)
 conda install -c conda-forge ffmpeg
 # Verify
 ffmpeg -version
 ```
 ### PyTorch CUDA mismatch
 **Error**: `RuntimeError: CUDA error: no kernel image is available`
 **Solutions**:
 ```bash
 # Check CUDA version
 nvcc --version
 python -c "import torch; print(torch.version.cuda)"
 # Install matching PyTorch
 pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu121
 # For CUDA 11.8
 pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu118
 ```
 ### xformers issues
 **Error**: `ImportError: xformers` related errors
 **Solutions**:
 ```bash
 # Install xformers for memory efficiency
 pip install xformers
 # Or disable xformers
 export AUDIOCRAFT_USE_XFORMERS=0
 # In Python
 import os
 os.environ["AUDIOCRAFT_USE_XFORMERS"] = "0"
 from audiocraft.models import MusicGen
 ```
 ## Model Loading Issues
 ### Out of memory during load
 **Error**: `torch.cuda.OutOfMemoryError` during model loading
 **Solutions**:
 ```python
 # Use smaller model
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Force CPU loading first
 import torch
 device = "cpu"
 model = MusicGen.get_pretrained('facebook/musicgen-small', device=device)
 model = model.to("cuda")
 # Use HuggingFace with device_map
 from transformers import MusicgenForConditionalGeneration
 model = MusicgenForConditionalGeneration.from_pretrained(
    "facebook/musicgen-small",
    device_map="auto"
 )
 ```
 ### Download failures
 **Error**: Connection errors or incomplete downloads
 **Solutions**:
 ```python
 # Set cache directory
 import os
 os.environ["AUDIOCRAFT_CACHE_DIR"] = "/path/to/cache"
 # Or for HuggingFace
 os.environ["HF_HOME"] = "/path/to/hf_cache"
 # Resume download
 from huggingface_hub import snapshot_download
 snapshot_download("facebook/musicgen-small", resume_download=True)
 # Use local files
 model = MusicGen.get_pretrained('/local/path/to/model')
 ```
 ### Wrong model type
 **Error**: Loading wrong model for task
 **Solutions**:
 ```python
 # For text-to-music: use MusicGen
 from audiocraft.models import MusicGen
 model = MusicGen.get_pretrained('facebook/musicgen-medium')
 # For text-to-sound: use AudioGen
 from audiocraft.models import AudioGen
 model = AudioGen.get_pretrained('facebook/audiogen-medium')
 # For melody conditioning: use melody variant
 model = MusicGen.get_pretrained('facebook/musicgen-melody')
 # For stereo: use stereo variant
 model = MusicGen.get_pretrained('facebook/musicgen-stereo-medium')
 ```
 ## Generation Issues
 ### Empty or silent output
 **Problem**: Generated audio is silent or very quiet
 **Solutions**:
 ```python
 import torch
 # Check output
 wav = model.generate(["upbeat music"])
 print(f"Shape: {wav.shape}")
 print(f"Max amplitude: {wav.abs().max().item()}")
 print(f"Mean amplitude: {wav.abs().mean().item()}")
 # If too quiet, normalize
 def normalize_audio(audio, target_db=-14.0):
    rms = torch.sqrt(torch.mean(audio ** 2))
    target_rms = 10 ** (target_db / 20)
    gain = target_rms / (rms + 1e-8)
    return audio * gain
 wav_normalized = normalize_audio(wav)
 ```
 ### Poor quality output
 **Problem**: Generated music sounds bad or noisy
 **Solutions**:
 ```python
 # Use larger model
 model = MusicGen.get_pretrained('facebook/musicgen-large')
 # Adjust generation parameters
 model.set_generation_params(
    duration=15,
    top_k=250,          # Increase for more diversity
    temperature=0.8,    # Lower for more focused output
    cfg_coef=4.0        # Increase for better text adherence
 )
 # Use better prompts
 # Bad: "music"
 # Good: "upbeat electronic dance music with synthesizers and punchy drums"
 # Try MultiBand Diffusion
 from audiocraft.models import MultiBandDiffusion
 mbd = MultiBandDiffusion.get_mbd_musicgen()
 tokens = model.generate_tokens(["prompt"])
 wav = mbd.tokens_to_wav(tokens)
 ```
 ### Generation too short
 **Problem**: Audio shorter than expected
 **Solutions**:
 ```python
 # Check duration setting
 model.set_generation_params(duration=30)  # Set before generate
 # Verify in generation
 print(f"Duration setting: {model.generation_params}")
 # Check output shape
 wav = model.generate(["prompt"])
 actual_duration = wav.shape[-1] / 32000
 print(f"Actual duration: {actual_duration}s")
 # Note: max duration is typically 30s
 ```
 ### Melody conditioning fails
 **Error**: Issues with melody-conditioned generation
 **Solutions**:
 ```python
 import torchaudio
 from audiocraft.models import MusicGen
 # Load melody model (not base model)
 model = MusicGen.get_pretrained('facebook/musicgen-melody')
 # Load and prepare melody
 melody, sr = torchaudio.load("melody.wav")
 # Resample to model sample rate if needed
 if sr != 32000:
    resampler = torchaudio.transforms.Resample(sr, 32000)
    melody = resampler(melody)
 # Ensure correct shape [batch, channels, samples]
 if melody.dim() == 1:
    melody = melody.unsqueeze(0).unsqueeze(0)
 elif melody.dim() == 2:
    melody = melody.unsqueeze(0)
 # Convert stereo to mono
 if melody.shape[1] > 1:
    melody = melody.mean(dim=1, keepdim=True)
 # Generate with melody
 model.set_generation_params(duration=min(melody.shape[-1] / 32000, 30))
 wav = model.generate_with_chroma(["piano cover"], melody, 32000)
 ```
 ## Memory Issues
 ### CUDA out of memory
 **Error**: `torch.cuda.OutOfMemoryError: CUDA out of memory`
 **Solutions**:
 ```python
 import torch
 # Clear cache before generation
 torch.cuda.empty_cache()
 # Use smaller model
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Reduce duration
 model.set_generation_params(duration=10)  # Instead of 30
 # Generate one at a time
 for prompt in prompts:
    wav = model.generate([prompt])
    save_audio(wav)
    torch.cuda.empty_cache()
 # Use CPU for very large generations
 model = MusicGen.get_pretrained('facebook/musicgen-small', device="cpu")
 ```
 ### Memory leak during batch processing
 **Problem**: Memory grows over time
 **Solutions**:
 ```python
 import gc
 import torch
 def generate_with_cleanup(model, prompts):
    results = []
    for prompt in prompts:
        with torch.no_grad():
            wav = model.generate([prompt])
            results.append(wav.cpu())
        # Cleanup
        del wav
        gc.collect()
        torch.cuda.empty_cache()
    return results
 # Use context manager
 with torch.inference_mode():
    wav = model.generate(["prompt"])
 ```
 ## Audio Format Issues
 ### Wrong sample rate
 **Problem**: Audio plays at wrong speed
 **Solutions**:
 ```python
 import torchaudio
 # MusicGen outputs at 32kHz
 sample_rate = 32000
 # AudioGen outputs at 16kHz
 sample_rate = 16000
 # Always use correct rate when saving
 torchaudio.save("output.wav", wav[0].cpu(), sample_rate=sample_rate)
 # Resample if needed
 resampler = torchaudio.transforms.Resample(32000, 44100)
 wav_resampled = resampler(wav)
 ```
 ### Stereo/mono mismatch
 **Problem**: Wrong number of channels
 **Solutions**:
 ```python
 # Check model type
 print(f"Audio channels: {wav.shape}")
 # Mono: [batch, 1, samples]
 # Stereo: [batch, 2, samples]
 # Convert mono to stereo
 if wav.shape[1] == 1:
    wav_stereo = wav.repeat(1, 2, 1)
 # Convert stereo to mono
 if wav.shape[1] == 2:
    wav_mono = wav.mean(dim=1, keepdim=True)
 # Use stereo model for stereo output
 model = MusicGen.get_pretrained('facebook/musicgen-stereo-medium')
 ```
 ### Clipping and distortion
 **Problem**: Audio has clipping or distortion
 **Solutions**:
 ```python
 import torch
 # Check for clipping
 max_val = wav.abs().max().item()
 print(f"Max amplitude: {max_val}")
 # Normalize to prevent clipping
 if max_val > 1.0:
    wav = wav / max_val
 # Apply soft clipping
 def soft_clip(x, threshold=0.9):
    return torch.tanh(x / threshold) * threshold
 wav_clipped = soft_clip(wav)
 # Lower temperature during generation
 model.set_generation_params(temperature=0.7)  # More controlled
 ```
 ## HuggingFace Transformers Issues
 ### Processor errors
 **Error**: Issues with MusicgenProcessor
 **Solutions**:
 ```python
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 # Load matching processor and model
 processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
 model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
 # Ensure inputs are on same device
 inputs = processor(
    text=["prompt"],
    padding=True,
    return_tensors="pt"
 ).to("cuda")
 # Check processor configuration
 print(processor.tokenizer)
 print(processor.feature_extractor)
 ```
 ### Generation parameter errors
 **Error**: Invalid generation parameters
 **Solutions**:
 ```python
 # HuggingFace uses different parameter names
 audio_values = model.generate(
    **inputs,
    do_sample=True,           # Enable sampling
    guidance_scale=3.0,       # CFG (not cfg_coef)
    max_new_tokens=256,       # Token limit (not duration)
    temperature=1.0
 )
 # Calculate tokens from duration
 # ~50 tokens per second
 duration_seconds = 10
 max_tokens = duration_seconds * 50
 audio_values = model.generate(**inputs, max_new_tokens=max_tokens)
 ```
 ## Performance Issues
 ### Slow generation
 **Problem**: Generation takes too long
 **Solutions**:
 ```python
 # Use smaller model
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 # Reduce duration
 model.set_generation_params(duration=10)
 # Use GPU
 model.to("cuda")
 # Enable flash attention if available
 # (requires compatible hardware)
 # Batch multiple prompts
 prompts = ["prompt1", "prompt2", "prompt3"]
 wav = model.generate(prompts)  # Single batch is faster than loop
 # Use compile (PyTorch 2.0+)
 model.lm = torch.compile(model.lm)
 ```
 ### CPU fallback
 **Problem**: Generation running on CPU instead of GPU
 **Solutions**:
 ```python
 import torch
 # Check CUDA availability
 print(f"CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(0)}")
 # Explicitly move to GPU
 model = MusicGen.get_pretrained('facebook/musicgen-small')
 model.to("cuda")
 # Verify model device
 print(f"Model device: {next(model.lm.parameters()).device}")
 ```
 ## Common Error Messages
 | Error | Cause | Solution |
 |-------|-------|----------|
 | `CUDA out of memory` | Model too large | Use smaller model, reduce duration |
 | `ffmpeg not found` | FFmpeg not installed | Install FFmpeg |
 | `No module named 'audiocraft'` | Not installed | `pip install audiocraft` |
 | `RuntimeError: Expected 3D tensor` | Wrong input shape | Check tensor dimensions |
 | `KeyError: 'melody'` | Wrong model for melody | Use musicgen-melody |
 | `Sample rate mismatch` | Wrong audio format | Resample to model rate |
 ## Getting Help
 1. **GitHub Issues**: https://github.com/facebookresearch/audiocraft/issues
 2. **HuggingFace Forums**: https://discuss.huggingface.co
 3. **Paper**: https://arxiv.org/abs/2306.05284
 ### Reporting Issues
 Include:
 - Python version
 - PyTorch version
 - CUDA version
 - AudioCraft version: `pip show audiocraft`
 - Full error traceback
 - Minimal reproducible code
 - Hardware (GPU model, VRAM)
--- a/skills/mlops/axolotl/SKILL.md
+++ b/skills/mlops/axolotl/SKILL.md
@ -0,0 +1,158 @@
 ---
 name: axolotl
 description: Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Fine-Tuning, Axolotl, LLM, LoRA, QLoRA, DPO, KTO, ORPO, GRPO, YAML, HuggingFace, DeepSpeed, Multimodal]
 dependencies: [axolotl, torch, transformers, datasets, peft, accelerate, deepspeed]
 ---
 # Axolotl Skill
 Comprehensive assistance with axolotl development, generated from official documentation.
 ## When to Use This Skill
 This skill should be triggered when:
 - Working with axolotl
 - Asking about axolotl features or APIs
 - Implementing axolotl solutions
 - Debugging axolotl code
 - Learning axolotl best practices
 ## Quick Reference
 ### Common Patterns
 **Pattern 1:** To validate that acceptable data transfer speeds exist for your training job, running NCCL Tests can help pinpoint bottlenecks, for example:
 ```
 ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 3
 ```
 **Pattern 2:** Configure your model to use FSDP in the Axolotl yaml. For example:
 ```
 fsdp_version: 2
 fsdp_config:
  offload_params: true
  state_dict_type: FULL_STATE_DICT
  auto_wrap_policy: TRANSFORMER_BASED_WRAP
  transformer_layer_cls_to_wrap: LlamaDecoderLayer
  reshard_after_forward: true
 ```
 **Pattern 3:** The context_parallel_size should be a divisor of the total number of GPUs. For example:
 ```
 context_parallel_size
 ```
 **Pattern 4:** For example: - With 8 GPUs and no sequence parallelism: 8 different batches processed per step - With 8 GPUs and context_parallel_size=4: Only 2 different batches processed per step (each split across 4 GPUs) - If your per-GPU micro_batch_size is 2, the global batch size decreases from 16 to 4
 ```
 context_parallel_size=4
 ```
 **Pattern 5:** Setting save_compressed: true in your configuration enables saving models in a compressed format, which: - Reduces disk space usage by approximately 40% - Maintains compatibility with vLLM for accelerated inference - Maintains compatibility with llmcompressor for further optimization (example: quantization)
 ```
 save_compressed: true
 ```
 **Pattern 6:** Note It is not necessary to place your integration in the integrations folder. It can be in any location, so long as it’s installed in a package in your python env. See this repo for an example: https://github.com/axolotl-ai-cloud/diff-transformer
 ```
 integrations
 ```
 **Pattern 7:** Handle both single-example and batched data. - single example: sample[‘input_ids’] is a list[int] - batched data: sample[‘input_ids’] is a list[list[int]]
 ```
 utils.trainer.drop_long_seq(sample, sequence_len=2048, min_sequence_len=2)
 ```
 ### Example Code Patterns
 **Example 1** (python):
 ```python
 cli.cloud.modal_.ModalCloud(config, app=None)
 ```
 **Example 2** (python):
 ```python
 cli.cloud.modal_.run_cmd(cmd, run_folder, volumes=None)
 ```
 **Example 3** (python):
 ```python
 core.trainers.base.AxolotlTrainer(
    *_args,
    bench_data_collator=None,
    eval_data_collator=None,
    dataset_tags=None,
    **kwargs,
 )
 ```
 **Example 4** (python):
 ```python
 core.trainers.base.AxolotlTrainer.log(logs, start_time=None)
 ```
 **Example 5** (python):
 ```python
 prompt_strategies.input_output.RawInputOutputPrompter()
 ```
 ## Reference Files
 This skill includes comprehensive documentation in `references/`:
 - **api.md** - Api documentation
 - **dataset-formats.md** - Dataset-Formats documentation
 - **other.md** - Other documentation
 Use `view` to read specific reference files when detailed information is needed.
 ## Working with This Skill
 ### For Beginners
 Start with the getting_started or tutorials reference files for foundational concepts.
 ### For Specific Features
 Use the appropriate category reference file (api, guides, etc.) for detailed information.
 ### For Code Examples
 The quick reference section above contains common patterns extracted from the official docs.
 ## Resources
 ### references/
 Organized documentation extracted from official sources. These files contain:
 - Detailed explanations
 - Code examples with language annotations
 - Links to original documentation
 - Table of contents for quick navigation
 ### scripts/
 Add helper scripts here for common automation tasks.
 ### assets/
 Add templates, boilerplate, or example projects here.
 ## Notes
 - This skill was automatically generated from official documentation
 - Reference files preserve the structure and examples from source docs
 - Code examples include language detection for better syntax highlighting
 - Quick reference patterns are extracted from common usage examples in the docs
 ## Updating
 To refresh this skill with updated documentation:
 1. Re-run the scraper with the same configuration
 2. The skill will be rebuilt with the latest information
--- a/skills/mlops/axolotl/references/api.md
+++ b/skills/mlops/axolotl/references/api.md
--- a/skills/mlops/axolotl/references/dataset-formats.md
+++ b/skills/mlops/axolotl/references/dataset-formats.md
--- a/skills/mlops/axolotl/references/index.md
+++ b/skills/mlops/axolotl/references/index.md
@ -0,0 +1,15 @@
 # Axolotl Documentation Index
 ## Categories
 ### Api
 **File:** `api.md`
 **Pages:** 150
 ### Dataset-Formats
 **File:** `dataset-formats.md`
 **Pages:** 9
 ### Other
 **File:** `other.md`
 **Pages:** 26
--- a/skills/mlops/axolotl/references/other.md
+++ b/skills/mlops/axolotl/references/other.md
--- a/skills/mlops/chroma/SKILL.md
+++ b/skills/mlops/chroma/SKILL.md
@ -0,0 +1,406 @@
 ---
 name: chroma
 description: Open-source embedding database for AI applications. Store embeddings and metadata, perform vector and full-text search, filter by metadata. Simple 4-function API. Scales from notebooks to production clusters. Use for semantic search, RAG applications, or document retrieval. Best for local development and open-source projects.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [RAG, Chroma, Vector Database, Embeddings, Semantic Search, Open Source, Self-Hosted, Document Retrieval, Metadata Filtering]
 dependencies: [chromadb, sentence-transformers]
 ---
 # Chroma - Open-Source Embedding Database
 The AI-native database for building LLM applications with memory.
 ## When to use Chroma
 **Use Chroma when:**
 - Building RAG (retrieval-augmented generation) applications
 - Need local/self-hosted vector database
 - Want open-source solution (Apache 2.0)
 - Prototyping in notebooks
 - Semantic search over documents
 - Storing embeddings with metadata
 **Metrics**:
 - **24,300+ GitHub stars**
 - **1,900+ forks**
 - **v1.3.3** (stable, weekly releases)
 - **Apache 2.0 license**
 **Use alternatives instead**:
 - **Pinecone**: Managed cloud, auto-scaling
 - **FAISS**: Pure similarity search, no metadata
 - **Weaviate**: Production ML-native database
 - **Qdrant**: High performance, Rust-based
 ## Quick start
 ### Installation
 ```bash
 # Python
 pip install chromadb
 # JavaScript/TypeScript
 npm install chromadb @chroma-core/default-embed
 ```
 ### Basic usage (Python)
 ```python
 import chromadb
 # Create client
 client = chromadb.Client()
 # Create collection
 collection = client.create_collection(name="my_collection")
 # Add documents
 collection.add(
    documents=["This is document 1", "This is document 2"],
    metadatas=[{"source": "doc1"}, {"source": "doc2"}],
    ids=["id1", "id2"]
 )
 # Query
 results = collection.query(
    query_texts=["document about topic"],
    n_results=2
 )
 print(results)
 ```
 ## Core operations
 ### 1. Create collection
 ```python
 # Simple collection
 collection = client.create_collection("my_docs")
 # With custom embedding function
 from chromadb.utils import embedding_functions
 openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key="your-key",
    model_name="text-embedding-3-small"
 )
 collection = client.create_collection(
    name="my_docs",
    embedding_function=openai_ef
 )
 # Get existing collection
 collection = client.get_collection("my_docs")
 # Delete collection
 client.delete_collection("my_docs")
 ```
 ### 2. Add documents
 ```python
 # Add with auto-generated IDs
 collection.add(
    documents=["Doc 1", "Doc 2", "Doc 3"],
    metadatas=[
        {"source": "web", "category": "tutorial"},
        {"source": "pdf", "page": 5},
        {"source": "api", "timestamp": "2025-01-01"}
    ],
    ids=["id1", "id2", "id3"]
 )
 # Add with custom embeddings
 collection.add(
    embeddings=[[0.1, 0.2, ...], [0.3, 0.4, ...]],
    documents=["Doc 1", "Doc 2"],
    ids=["id1", "id2"]
 )
 ```
 ### 3. Query (similarity search)
 ```python
 # Basic query
 results = collection.query(
    query_texts=["machine learning tutorial"],
    n_results=5
 )
 # Query with filters
 results = collection.query(
    query_texts=["Python programming"],
    n_results=3,
    where={"source": "web"}
 )
 # Query with metadata filters
 results = collection.query(
    query_texts=["advanced topics"],
    where={
        "$and": [
            {"category": "tutorial"},
            {"difficulty": {"$gte": 3}}
        ]
    }
 )
 # Access results
 print(results["documents"])      # List of matching documents
 print(results["metadatas"])      # Metadata for each doc
 print(results["distances"])      # Similarity scores
 print(results["ids"])            # Document IDs
 ```
 ### 4. Get documents
 ```python
 # Get by IDs
 docs = collection.get(
    ids=["id1", "id2"]
 )
 # Get with filters
 docs = collection.get(
    where={"category": "tutorial"},
    limit=10
 )
 # Get all documents
 docs = collection.get()
 ```
 ### 5. Update documents
 ```python
 # Update document content
 collection.update(
    ids=["id1"],
    documents=["Updated content"],
    metadatas=[{"source": "updated"}]
 )
 ```
 ### 6. Delete documents
 ```python
 # Delete by IDs
 collection.delete(ids=["id1", "id2"])
 # Delete with filter
 collection.delete(
    where={"source": "outdated"}
 )
 ```
 ## Persistent storage
 ```python
 # Persist to disk
 client = chromadb.PersistentClient(path="./chroma_db")
 collection = client.create_collection("my_docs")
 collection.add(documents=["Doc 1"], ids=["id1"])
 # Data persisted automatically
 # Reload later with same path
 client = chromadb.PersistentClient(path="./chroma_db")
 collection = client.get_collection("my_docs")
 ```
 ## Embedding functions
 ### Default (Sentence Transformers)
 ```python
 # Uses sentence-transformers by default
 collection = client.create_collection("my_docs")
 # Default model: all-MiniLM-L6-v2
 ```
 ### OpenAI
 ```python
 from chromadb.utils import embedding_functions
 openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key="your-key",
    model_name="text-embedding-3-small"
 )
 collection = client.create_collection(
    name="openai_docs",
    embedding_function=openai_ef
 )
 ```
 ### HuggingFace
 ```python
 huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
    api_key="your-key",
    model_name="sentence-transformers/all-mpnet-base-v2"
 )
 collection = client.create_collection(
    name="hf_docs",
    embedding_function=huggingface_ef
 )
 ```
 ### Custom embedding function
 ```python
 from chromadb import Documents, EmbeddingFunction, Embeddings
 class MyEmbeddingFunction(EmbeddingFunction):
    def __call__(self, input: Documents) -> Embeddings:
        # Your embedding logic
        return embeddings
 my_ef = MyEmbeddingFunction()
 collection = client.create_collection(
    name="custom_docs",
    embedding_function=my_ef
 )
 ```
 ## Metadata filtering
 ```python
 # Exact match
 results = collection.query(
    query_texts=["query"],
    where={"category": "tutorial"}
 )
 # Comparison operators
 results = collection.query(
    query_texts=["query"],
    where={"page": {"$gt": 10}}  # $gt, $gte, $lt, $lte, $ne
 )
 # Logical operators
 results = collection.query(
    query_texts=["query"],
    where={
        "$and": [
            {"category": "tutorial"},
            {"difficulty": {"$lte": 3}}
        ]
    }  # Also: $or
 )
 # Contains
 results = collection.query(
    query_texts=["query"],
    where={"tags": {"$in": ["python", "ml"]}}
 )
 ```
 ## LangChain integration
 ```python
 from langchain_chroma import Chroma
 from langchain_openai import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 # Split documents
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
 docs = text_splitter.split_documents(documents)
 # Create Chroma vector store
 vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=OpenAIEmbeddings(),
    persist_directory="./chroma_db"
 )
 # Query
 results = vectorstore.similarity_search("machine learning", k=3)
 # As retriever
 retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
 ```
 ## LlamaIndex integration
 ```python
 from llama_index.vector_stores.chroma import ChromaVectorStore
 from llama_index.core import VectorStoreIndex, StorageContext
 import chromadb
 # Initialize Chroma
 db = chromadb.PersistentClient(path="./chroma_db")
 collection = db.get_or_create_collection("my_collection")
 # Create vector store
 vector_store = ChromaVectorStore(chroma_collection=collection)
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
 # Create index
 index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
 )
 # Query
 query_engine = index.as_query_engine()
 response = query_engine.query("What is machine learning?")
 ```
 ## Server mode
 ```python
 # Run Chroma server
 # Terminal: chroma run --path ./chroma_db --port 8000
 # Connect to server
 import chromadb
 from chromadb.config import Settings
 client = chromadb.HttpClient(
    host="localhost",
    port=8000,
    settings=Settings(anonymized_telemetry=False)
 )
 # Use as normal
 collection = client.get_or_create_collection("my_docs")
 ```
 ## Best practices
 1. **Use persistent client** - Don't lose data on restart
 2. **Add metadata** - Enables filtering and tracking
 3. **Batch operations** - Add multiple docs at once
 4. **Choose right embedding model** - Balance speed/quality
 5. **Use filters** - Narrow search space
 6. **Unique IDs** - Avoid collisions
 7. **Regular backups** - Copy chroma_db directory
 8. **Monitor collection size** - Scale up if needed
 9. **Test embedding functions** - Ensure quality
 10. **Use server mode for production** - Better for multi-user
 ## Performance
 | Operation | Latency | Notes |
 |-----------|---------|-------|
 | Add 100 docs | ~1-3s | With embedding |
 | Query (top 10) | ~50-200ms | Depends on collection size |
 | Metadata filter | ~10-50ms | Fast with proper indexing |
 ## Resources
 - **GitHub**: https://github.com/chroma-core/chroma ⭐ 24,300+
 - **Docs**: https://docs.trychroma.com
 - **Discord**: https://discord.gg/MMeYNTmh3x
 - **Version**: 1.3.3+
 - **License**: Apache 2.0
--- a/skills/mlops/chroma/references/integration.md
+++ b/skills/mlops/chroma/references/integration.md
@ -0,0 +1,38 @@
 # Chroma Integration Guide
 Integration with LangChain, LlamaIndex, and frameworks.
 ## LangChain
 ```python
 from langchain_chroma import Chroma
 from langchain_openai import OpenAIEmbeddings
 vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=OpenAIEmbeddings(),
    persist_directory="./chroma_db"
 )
 # Query
 results = vectorstore.similarity_search("query", k=3)
 # As retriever
 retriever = vectorstore.as_retriever()
 ```
 ## LlamaIndex
 ```python
 from llama_index.vector_stores.chroma import ChromaVectorStore
 import chromadb
 db = chromadb.PersistentClient(path="./chroma_db")
 collection = db.get_or_create_collection("docs")
 vector_store = ChromaVectorStore(chroma_collection=collection)
 ```
 ## Resources
 - **Docs**: https://docs.trychroma.com
--- a/skills/mlops/clip/SKILL.md
+++ b/skills/mlops/clip/SKILL.md
@ -0,0 +1,253 @@
 ---
 name: clip
 description: OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-purpose image understanding.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Multimodal, CLIP, Vision-Language, Zero-Shot, Image Classification, OpenAI, Image Search, Cross-Modal Retrieval, Content Moderation]
 dependencies: [transformers, torch, pillow]
 ---
 # CLIP - Contrastive Language-Image Pre-Training
 OpenAI's model that understands images from natural language.
 ## When to use CLIP
 **Use when:**
 - Zero-shot image classification (no training data needed)
 - Image-text similarity/matching
 - Semantic image search
 - Content moderation (detect NSFW, violence)
 - Visual question answering
 - Cross-modal retrieval (image→text, text→image)
 **Metrics**:
 - **25,300+ GitHub stars**
 - Trained on 400M image-text pairs
 - Matches ResNet-50 on ImageNet (zero-shot)
 - MIT License
 **Use alternatives instead**:
 - **BLIP-2**: Better captioning
 - **LLaVA**: Vision-language chat
 - **Segment Anything**: Image segmentation
 ## Quick start
 ### Installation
 ```bash
 pip install git+https://github.com/openai/CLIP.git
 pip install torch torchvision ftfy regex tqdm
 ```
 ### Zero-shot classification
 ```python
 import torch
 import clip
 from PIL import Image
 # Load model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model, preprocess = clip.load("ViT-B/32", device=device)
 # Load image
 image = preprocess(Image.open("photo.jpg")).unsqueeze(0).to(device)
 # Define possible labels
 text = clip.tokenize(["a dog", "a cat", "a bird", "a car"]).to(device)
 # Compute similarity
 with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    # Cosine similarity
    logits_per_image, logits_per_text = model(image, text)
    probs = logits_per_image.softmax(dim=-1).cpu().numpy()
 # Print results
 labels = ["a dog", "a cat", "a bird", "a car"]
 for label, prob in zip(labels, probs[0]):
    print(f"{label}: {prob:.2%}")
 ```
 ## Available models
 ```python
 # Models (sorted by size)
 models = [
    "RN50",           # ResNet-50
    "RN101",          # ResNet-101
    "ViT-B/32",       # Vision Transformer (recommended)
    "ViT-B/16",       # Better quality, slower
    "ViT-L/14",       # Best quality, slowest
 ]
 model, preprocess = clip.load("ViT-B/32")
 ```
 | Model | Parameters | Speed | Quality |
 |-------|------------|-------|---------|
 | RN50 | 102M | Fast | Good |
 | ViT-B/32 | 151M | Medium | Better |
 | ViT-L/14 | 428M | Slow | Best |
 ## Image-text similarity
 ```python
 # Compute embeddings
 image_features = model.encode_image(image)
 text_features = model.encode_text(text)
 # Normalize
 image_features /= image_features.norm(dim=-1, keepdim=True)
 text_features /= text_features.norm(dim=-1, keepdim=True)
 # Cosine similarity
 similarity = (image_features @ text_features.T).item()
 print(f"Similarity: {similarity:.4f}")
 ```
 ## Semantic image search
 ```python
 # Index images
 image_paths = ["img1.jpg", "img2.jpg", "img3.jpg"]
 image_embeddings = []
 for img_path in image_paths:
    image = preprocess(Image.open(img_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        embedding = model.encode_image(image)
        embedding /= embedding.norm(dim=-1, keepdim=True)
    image_embeddings.append(embedding)
 image_embeddings = torch.cat(image_embeddings)
 # Search with text query
 query = "a sunset over the ocean"
 text_input = clip.tokenize([query]).to(device)
 with torch.no_grad():
    text_embedding = model.encode_text(text_input)
    text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
 # Find most similar images
 similarities = (text_embedding @ image_embeddings.T).squeeze(0)
 top_k = similarities.topk(3)
 for idx, score in zip(top_k.indices, top_k.values):
    print(f"{image_paths[idx]}: {score:.3f}")
 ```
 ## Content moderation
 ```python
 # Define categories
 categories = [
    "safe for work",
    "not safe for work",
    "violent content",
    "graphic content"
 ]
 text = clip.tokenize(categories).to(device)
 # Check image
 with torch.no_grad():
    logits_per_image, _ = model(image, text)
    probs = logits_per_image.softmax(dim=-1)
 # Get classification
 max_idx = probs.argmax().item()
 max_prob = probs[0, max_idx].item()
 print(f"Category: {categories[max_idx]} ({max_prob:.2%})")
 ```
 ## Batch processing
 ```python
 # Process multiple images
 images = [preprocess(Image.open(f"img{i}.jpg")) for i in range(10)]
 images = torch.stack(images).to(device)
 with torch.no_grad():
    image_features = model.encode_image(images)
    image_features /= image_features.norm(dim=-1, keepdim=True)
 # Batch text
 texts = ["a dog", "a cat", "a bird"]
 text_tokens = clip.tokenize(texts).to(device)
 with torch.no_grad():
    text_features = model.encode_text(text_tokens)
    text_features /= text_features.norm(dim=-1, keepdim=True)
 # Similarity matrix (10 images × 3 texts)
 similarities = image_features @ text_features.T
 print(similarities.shape)  # (10, 3)
 ```
 ## Integration with vector databases
 ```python
 # Store CLIP embeddings in Chroma/FAISS
 import chromadb
 client = chromadb.Client()
 collection = client.create_collection("image_embeddings")
 # Add image embeddings
 for img_path, embedding in zip(image_paths, image_embeddings):
    collection.add(
        embeddings=[embedding.cpu().numpy().tolist()],
        metadatas=[{"path": img_path}],
        ids=[img_path]
    )
 # Query with text
 query = "a sunset"
 text_embedding = model.encode_text(clip.tokenize([query]))
 results = collection.query(
    query_embeddings=[text_embedding.cpu().numpy().tolist()],
    n_results=5
 )
 ```
 ## Best practices
 1. **Use ViT-B/32 for most cases** - Good balance
 2. **Normalize embeddings** - Required for cosine similarity
 3. **Batch processing** - More efficient
 4. **Cache embeddings** - Expensive to recompute
 5. **Use descriptive labels** - Better zero-shot performance
 6. **GPU recommended** - 10-50× faster
 7. **Preprocess images** - Use provided preprocess function
 ## Performance
 | Operation | CPU | GPU (V100) |
 |-----------|-----|------------|
 | Image encoding | ~200ms | ~20ms |
 | Text encoding | ~50ms | ~5ms |
 | Similarity compute | <1ms | <1ms |
 ## Limitations
 1. **Not for fine-grained tasks** - Best for broad categories
 2. **Requires descriptive text** - Vague labels perform poorly
 3. **Biased on web data** - May have dataset biases
 4. **No bounding boxes** - Whole image only
 5. **Limited spatial understanding** - Position/counting weak
 ## Resources
 - **GitHub**: https://github.com/openai/CLIP ⭐ 25,300+
 - **Paper**: https://arxiv.org/abs/2103.00020
 - **Colab**: https://colab.research.google.com/github/openai/clip/
 - **License**: MIT
--- a/skills/mlops/clip/references/applications.md
+++ b/skills/mlops/clip/references/applications.md
@ -0,0 +1,207 @@
 # CLIP Applications Guide
 Practical applications and use cases for CLIP.
 ## Zero-shot image classification
 ```python
 import torch
 import clip
 from PIL import Image
 model, preprocess = clip.load("ViT-B/32")
 # Define categories
 categories = [
    "a photo of a dog",
    "a photo of a cat",
    "a photo of a bird",
    "a photo of a car",
    "a photo of a person"
 ]
 # Prepare image
 image = preprocess(Image.open("photo.jpg")).unsqueeze(0)
 text = clip.tokenize(categories)
 # Classify
 with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    logits_per_image, _ = model(image, text)
    probs = logits_per_image.softmax(dim=-1).cpu().numpy()
 # Print results
 for category, prob in zip(categories, probs[0]):
    print(f"{category}: {prob:.2%}")
 ```
 ## Semantic image search
 ```python
 # Index images
 image_database = []
 image_paths = ["img1.jpg", "img2.jpg", "img3.jpg"]
 for img_path in image_paths:
    image = preprocess(Image.open(img_path)).unsqueeze(0)
    with torch.no_grad():
        features = model.encode_image(image)
        features /= features.norm(dim=-1, keepdim=True)
    image_database.append((img_path, features))
 # Search with text
 query = "a sunset over mountains"
 text_input = clip.tokenize([query])
 with torch.no_grad():
    text_features = model.encode_text(text_input)
    text_features /= text_features.norm(dim=-1, keepdim=True)
 # Find matches
 similarities = []
 for img_path, img_features in image_database:
    similarity = (text_features @ img_features.T).item()
    similarities.append((img_path, similarity))
 # Sort by similarity
 similarities.sort(key=lambda x: x[1], reverse=True)
 for img_path, score in similarities[:3]:
    print(f"{img_path}: {score:.3f}")
 ```
 ## Content moderation
 ```python
 # Define safety categories
 categories = [
    "safe for work content",
    "not safe for work content",
    "violent or graphic content",
    "hate speech or offensive content",
    "spam or misleading content"
 ]
 text = clip.tokenize(categories)
 # Check image
 with torch.no_grad():
    logits, _ = model(image, text)
    probs = logits.softmax(dim=-1)
 # Get classification
 max_idx = probs.argmax().item()
 confidence = probs[0, max_idx].item()
 if confidence > 0.7:
    print(f"Classified as: {categories[max_idx]} ({confidence:.2%})")
 else:
    print(f"Uncertain classification (confidence: {confidence:.2%})")
 ```
 ## Image-to-text retrieval
 ```python
 # Text database
 captions = [
    "A beautiful sunset over the ocean",
    "A cute dog playing in the park",
    "A modern city skyline at night",
    "A delicious pizza with toppings"
 ]
 # Encode captions
 caption_features = []
 for caption in captions:
    text = clip.tokenize([caption])
    with torch.no_grad():
        features = model.encode_text(text)
        features /= features.norm(dim=-1, keepdim=True)
    caption_features.append(features)
 caption_features = torch.cat(caption_features)
 # Find matching captions for image
 with torch.no_grad():
    image_features = model.encode_image(image)
    image_features /= image_features.norm(dim=-1, keepdim=True)
 similarities = (image_features @ caption_features.T).squeeze(0)
 top_k = similarities.topk(3)
 for idx, score in zip(top_k.indices, top_k.values):
    print(f"{captions[idx]}: {score:.3f}")
 ```
 ## Visual question answering
 ```python
 # Create yes/no questions
 image = preprocess(Image.open("photo.jpg")).unsqueeze(0)
 questions = [
    "a photo showing people",
    "a photo showing animals",
    "a photo taken indoors",
    "a photo taken outdoors",
    "a photo taken during daytime",
    "a photo taken at night"
 ]
 text = clip.tokenize(questions)
 with torch.no_grad():
    logits, _ = model(image, text)
    probs = logits.softmax(dim=-1)
 # Answer questions
 for question, prob in zip(questions, probs[0]):
    answer = "Yes" if prob > 0.5 else "No"
    print(f"{question}: {answer} ({prob:.2%})")
 ```
 ## Image deduplication
 ```python
 # Detect duplicate/similar images
 def compute_similarity(img1_path, img2_path):
    img1 = preprocess(Image.open(img1_path)).unsqueeze(0)
    img2 = preprocess(Image.open(img2_path)).unsqueeze(0)
    with torch.no_grad():
        feat1 = model.encode_image(img1)
        feat2 = model.encode_image(img2)
        feat1 /= feat1.norm(dim=-1, keepdim=True)
        feat2 /= feat2.norm(dim=-1, keepdim=True)
        similarity = (feat1 @ feat2.T).item()
    return similarity
 # Check for duplicates
 threshold = 0.95
 image_pairs = [("img1.jpg", "img2.jpg"), ("img1.jpg", "img3.jpg")]
 for img1, img2 in image_pairs:
    sim = compute_similarity(img1, img2)
    if sim > threshold:
        print(f"{img1} and {img2} are duplicates (similarity: {sim:.3f})")
 ```
 ## Best practices
 1. **Use descriptive labels** - "a photo of X" works better than just "X"
 2. **Normalize embeddings** - Always normalize for cosine similarity
 3. **Batch processing** - Process multiple images/texts together
 4. **Cache embeddings** - Expensive to recompute
 5. **Set appropriate thresholds** - Test on validation data
 6. **Use GPU** - 10-50× faster than CPU
 7. **Consider model size** - ViT-B/32 good default, ViT-L/14 for best quality
 ## Resources
 - **Paper**: https://arxiv.org/abs/2103.00020
 - **GitHub**: https://github.com/openai/CLIP
 - **Colab**: https://colab.research.google.com/github/openai/clip/
--- a/skills/mlops/code-review/SKILL.md
+++ b/skills/mlops/code-review/SKILL.md
@ -0,0 +1,81 @@
 ---
 name: code-review
 description: Guidelines for performing thorough code reviews with security and quality focus
 ---
 # Code Review Skill
 Use this skill when reviewing code changes, pull requests, or auditing existing code.
 ## Review Checklist
 ### 1. Security First
 - [ ] No hardcoded secrets, API keys, or credentials
 - [ ] Input validation on all user-provided data
 - [ ] SQL queries use parameterized statements (no string concatenation)
 - [ ] File operations validate paths (no path traversal)
 - [ ] Authentication/authorization checks present where needed
 ### 2. Error Handling
 - [ ] All external calls (API, DB, file) have try/catch
 - [ ] Errors are logged with context (but no sensitive data)
 - [ ] User-facing errors are helpful but don't leak internals
 - [ ] Resources are cleaned up in finally blocks or context managers
 ### 3. Code Quality
 - [ ] Functions do one thing and are reasonably sized (<50 lines ideal)
 - [ ] Variable names are descriptive (no single letters except loops)
 - [ ] No commented-out code left behind
 - [ ] Complex logic has explanatory comments
 - [ ] No duplicate code (DRY principle)
 ### 4. Testing Considerations
 - [ ] Edge cases handled (empty inputs, nulls, boundaries)
 - [ ] Happy path and error paths both work
 - [ ] New code has corresponding tests (if test suite exists)
 ## Review Response Format
 When providing review feedback, structure it as:
 ```
 ## Summary
 [1-2 sentence overall assessment]
 ## Critical Issues (Must Fix)
 - Issue 1: [description + suggested fix]
 - Issue 2: ...
 ## Suggestions (Nice to Have)
 - Suggestion 1: [description]
 ## Questions
 - [Any clarifying questions about intent]
 ```
 ## Common Patterns to Flag
 ### Python
 ```python
 # Bad: SQL injection risk
 cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
 # Good: Parameterized query
 cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))
 ```
 ### JavaScript
 ```javascript
 // Bad: XSS risk
 element.innerHTML = userInput;
 // Good: Safe text content
 element.textContent = userInput;
 ```
 ## Tone Guidelines
 - Be constructive, not critical
 - Explain *why* something is an issue, not just *what*
 - Offer solutions, not just problems
 - Acknowledge good patterns you see
--- a/skills/mlops/dspy/SKILL.md
+++ b/skills/mlops/dspy/SKILL.md
@ -0,0 +1,590 @@
 ---
 name: dspy
 description: Build complex AI systems with declarative programming, optimize prompts automatically, create modular RAG systems and agents with DSPy - Stanford NLP's framework for systematic LM programming
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Prompt Engineering, DSPy, Declarative Programming, RAG, Agents, Prompt Optimization, LM Programming, Stanford NLP, Automatic Optimization, Modular AI]
 dependencies: [dspy, openai, anthropic]
 ---
 # DSPy: Declarative Language Model Programming
 ## When to Use This Skill
 Use DSPy when you need to:
 - **Build complex AI systems** with multiple components and workflows
 - **Program LMs declaratively** instead of manual prompt engineering
 - **Optimize prompts automatically** using data-driven methods
 - **Create modular AI pipelines** that are maintainable and portable
 - **Improve model outputs systematically** with optimizers
 - **Build RAG systems, agents, or classifiers** with better reliability
 **GitHub Stars**: 22,000+ | **Created By**: Stanford NLP
 ## Installation
 ```bash
 # Stable release
 pip install dspy
 # Latest development version
 pip install git+https://github.com/stanfordnlp/dspy.git
 # With specific LM providers
 pip install dspy[openai]        # OpenAI
 pip install dspy[anthropic]     # Anthropic Claude
 pip install dspy[all]           # All providers
 ```
 ## Quick Start
 ### Basic Example: Question Answering
 ```python
 import dspy
 # Configure your language model
 lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
 dspy.settings.configure(lm=lm)
 # Define a signature (input → output)
 class QA(dspy.Signature):
    """Answer questions with short factual answers."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")
 # Create a module
 qa = dspy.Predict(QA)
 # Use it
 response = qa(question="What is the capital of France?")
 print(response.answer)  # "Paris"
 ```
 ### Chain of Thought Reasoning
 ```python
 import dspy
 lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
 dspy.settings.configure(lm=lm)
 # Use ChainOfThought for better reasoning
 class MathProblem(dspy.Signature):
    """Solve math word problems."""
    problem = dspy.InputField()
    answer = dspy.OutputField(desc="numerical answer")
 # ChainOfThought generates reasoning steps automatically
 cot = dspy.ChainOfThought(MathProblem)
 response = cot(problem="If John has 5 apples and gives 2 to Mary, how many does he have?")
 print(response.rationale)  # Shows reasoning steps
 print(response.answer)     # "3"
 ```
 ## Core Concepts
 ### 1. Signatures
 Signatures define the structure of your AI task (inputs → outputs):
 ```python
 # Inline signature (simple)
 qa = dspy.Predict("question -> answer")
 # Class signature (detailed)
 class Summarize(dspy.Signature):
    """Summarize text into key points."""
    text = dspy.InputField()
    summary = dspy.OutputField(desc="bullet points, 3-5 items")
 summarizer = dspy.ChainOfThought(Summarize)
 ```
 **When to use each:**
 - **Inline**: Quick prototyping, simple tasks
 - **Class**: Complex tasks, type hints, better documentation
 ### 2. Modules
 Modules are reusable components that transform inputs to outputs:
 #### dspy.Predict
 Basic prediction module:
 ```python
 predictor = dspy.Predict("context, question -> answer")
 result = predictor(context="Paris is the capital of France",
                   question="What is the capital?")
 ```
 #### dspy.ChainOfThought
 Generates reasoning steps before answering:
 ```python
 cot = dspy.ChainOfThought("question -> answer")
 result = cot(question="Why is the sky blue?")
 print(result.rationale)  # Reasoning steps
 print(result.answer)     # Final answer
 ```
 #### dspy.ReAct
 Agent-like reasoning with tools:
 ```python
 from dspy.predict import ReAct
 class SearchQA(dspy.Signature):
    """Answer questions using search."""
    question = dspy.InputField()
    answer = dspy.OutputField()
 def search_tool(query: str) -> str:
    """Search Wikipedia."""
    # Your search implementation
    return results
 react = ReAct(SearchQA, tools=[search_tool])
 result = react(question="When was Python created?")
 ```
 #### dspy.ProgramOfThought
 Generates and executes code for reasoning:
 ```python
 pot = dspy.ProgramOfThought("question -> answer")
 result = pot(question="What is 15% of 240?")
 # Generates: answer = 240 * 0.15
 ```
 ### 3. Optimizers
 Optimizers improve your modules automatically using training data:
 #### BootstrapFewShot
 Learns from examples:
 ```python
 from dspy.teleprompt import BootstrapFewShot
 # Training data
 trainset = [
    dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
    dspy.Example(question="What is 3+5?", answer="8").with_inputs("question"),
 ]
 # Define metric
 def validate_answer(example, pred, trace=None):
    return example.answer == pred.answer
 # Optimize
 optimizer = BootstrapFewShot(metric=validate_answer, max_bootstrapped_demos=3)
 optimized_qa = optimizer.compile(qa, trainset=trainset)
 # Now optimized_qa performs better!
 ```
 #### MIPRO (Most Important Prompt Optimization)
 Iteratively improves prompts:
 ```python
 from dspy.teleprompt import MIPRO
 optimizer = MIPRO(
    metric=validate_answer,
    num_candidates=10,
    init_temperature=1.0
 )
 optimized_cot = optimizer.compile(
    cot,
    trainset=trainset,
    num_trials=100
 )
 ```
 #### BootstrapFinetune
 Creates datasets for model fine-tuning:
 ```python
 from dspy.teleprompt import BootstrapFinetune
 optimizer = BootstrapFinetune(metric=validate_answer)
 optimized_module = optimizer.compile(qa, trainset=trainset)
 # Exports training data for fine-tuning
 ```
 ### 4. Building Complex Systems
 #### Multi-Stage Pipeline
 ```python
 import dspy
 class MultiHopQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=3)
        self.generate_query = dspy.ChainOfThought("question -> search_query")
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        # Stage 1: Generate search query
        search_query = self.generate_query(question=question).search_query
        # Stage 2: Retrieve context
        passages = self.retrieve(search_query).passages
        context = "\n".join(passages)
        # Stage 3: Generate answer
        answer = self.generate_answer(context=context, question=question).answer
        return dspy.Prediction(answer=answer, context=context)
 # Use the pipeline
 qa_system = MultiHopQA()
 result = qa_system(question="Who wrote the book that inspired the movie Blade Runner?")
 ```
 #### RAG System with Optimization
 ```python
 import dspy
 from dspy.retrieve.chromadb_rm import ChromadbRM
 # Configure retriever
 retriever = ChromadbRM(
    collection_name="documents",
    persist_directory="./chroma_db"
 )
 class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        context = self.retrieve(question).passages
        return self.generate(context=context, question=question)
 # Create and optimize
 rag = RAG()
 # Optimize with training data
 from dspy.teleprompt import BootstrapFewShot
 optimizer = BootstrapFewShot(metric=validate_answer)
 optimized_rag = optimizer.compile(rag, trainset=trainset)
 ```
 ## LM Provider Configuration
 ### Anthropic Claude
 ```python
 import dspy
 lm = dspy.Claude(
    model="claude-sonnet-4-5-20250929",
    api_key="your-api-key",  # Or set ANTHROPIC_API_KEY env var
    max_tokens=1000,
    temperature=0.7
 )
 dspy.settings.configure(lm=lm)
 ```
 ### OpenAI
 ```python
 lm = dspy.OpenAI(
    model="gpt-4",
    api_key="your-api-key",
    max_tokens=1000
 )
 dspy.settings.configure(lm=lm)
 ```
 ### Local Models (Ollama)
 ```python
 lm = dspy.OllamaLocal(
    model="llama3.1",
    base_url="http://localhost:11434"
 )
 dspy.settings.configure(lm=lm)
 ```
 ### Multiple Models
 ```python
 # Different models for different tasks
 cheap_lm = dspy.OpenAI(model="gpt-3.5-turbo")
 strong_lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
 # Use cheap model for retrieval, strong model for reasoning
 with dspy.settings.context(lm=cheap_lm):
    context = retriever(question)
 with dspy.settings.context(lm=strong_lm):
    answer = generator(context=context, question=question)
 ```
 ## Common Patterns
 ### Pattern 1: Structured Output
 ```python
 from pydantic import BaseModel, Field
 class PersonInfo(BaseModel):
    name: str = Field(description="Full name")
    age: int = Field(description="Age in years")
    occupation: str = Field(description="Current job")
 class ExtractPerson(dspy.Signature):
    """Extract person information from text."""
    text = dspy.InputField()
    person: PersonInfo = dspy.OutputField()
 extractor = dspy.TypedPredictor(ExtractPerson)
 result = extractor(text="John Doe is a 35-year-old software engineer.")
 print(result.person.name)  # "John Doe"
 print(result.person.age)   # 35
 ```
 ### Pattern 2: Assertion-Driven Optimization
 ```python
 import dspy
 from dspy.primitives.assertions import assert_transform_module, backtrack_handler
 class MathQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.solve = dspy.ChainOfThought("problem -> solution: float")
    def forward(self, problem):
        solution = self.solve(problem=problem).solution
        # Assert solution is numeric
        dspy.Assert(
            isinstance(float(solution), float),
            "Solution must be a number",
            backtrack=backtrack_handler
        )
        return dspy.Prediction(solution=solution)
 ```
 ### Pattern 3: Self-Consistency
 ```python
 import dspy
 from collections import Counter
 class ConsistentQA(dspy.Module):
    def __init__(self, num_samples=5):
        super().__init__()
        self.qa = dspy.ChainOfThought("question -> answer")
        self.num_samples = num_samples
    def forward(self, question):
        # Generate multiple answers
        answers = []
        for _ in range(self.num_samples):
            result = self.qa(question=question)
            answers.append(result.answer)
        # Return most common answer
        most_common = Counter(answers).most_common(1)[0][0]
        return dspy.Prediction(answer=most_common)
 ```
 ### Pattern 4: Retrieval with Reranking
 ```python
 class RerankedRAG(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=10)
        self.rerank = dspy.Predict("question, passage -> relevance_score: float")
        self.answer = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        # Retrieve candidates
        passages = self.retrieve(question).passages
        # Rerank passages
        scored = []
        for passage in passages:
            score = float(self.rerank(question=question, passage=passage).relevance_score)
            scored.append((score, passage))
        # Take top 3
        top_passages = [p for _, p in sorted(scored, reverse=True)[:3]]
        context = "\n\n".join(top_passages)
        # Generate answer
        return self.answer(context=context, question=question)
 ```
 ## Evaluation and Metrics
 ### Custom Metrics
 ```python
 def exact_match(example, pred, trace=None):
    """Exact match metric."""
    return example.answer.lower() == pred.answer.lower()
 def f1_score(example, pred, trace=None):
    """F1 score for text overlap."""
    pred_tokens = set(pred.answer.lower().split())
    gold_tokens = set(example.answer.lower().split())
    if not pred_tokens:
        return 0.0
    precision = len(pred_tokens & gold_tokens) / len(pred_tokens)
    recall = len(pred_tokens & gold_tokens) / len(gold_tokens)
    if precision + recall == 0:
        return 0.0
    return 2 * (precision * recall) / (precision + recall)
 ```
 ### Evaluation
 ```python
 from dspy.evaluate import Evaluate
 # Create evaluator
 evaluator = Evaluate(
    devset=testset,
    metric=exact_match,
    num_threads=4,
    display_progress=True
 )
 # Evaluate model
 score = evaluator(qa_system)
 print(f"Accuracy: {score}")
 # Compare optimized vs unoptimized
 score_before = evaluator(qa)
 score_after = evaluator(optimized_qa)
 print(f"Improvement: {score_after - score_before:.2%}")
 ```
 ## Best Practices
 ### 1. Start Simple, Iterate
 ```python
 # Start with Predict
 qa = dspy.Predict("question -> answer")
 # Add reasoning if needed
 qa = dspy.ChainOfThought("question -> answer")
 # Add optimization when you have data
 optimized_qa = optimizer.compile(qa, trainset=data)
 ```
 ### 2. Use Descriptive Signatures
 ```python
 # ❌ Bad: Vague
 class Task(dspy.Signature):
    input = dspy.InputField()
    output = dspy.OutputField()
 # ✅ Good: Descriptive
 class SummarizeArticle(dspy.Signature):
    """Summarize news articles into 3-5 key points."""
    article = dspy.InputField(desc="full article text")
    summary = dspy.OutputField(desc="bullet points, 3-5 items")
 ```
 ### 3. Optimize with Representative Data
 ```python
 # Create diverse training examples
 trainset = [
    dspy.Example(question="factual", answer="...).with_inputs("question"),
    dspy.Example(question="reasoning", answer="...").with_inputs("question"),
    dspy.Example(question="calculation", answer="...").with_inputs("question"),
 ]
 # Use validation set for metric
 def metric(example, pred, trace=None):
    return example.answer in pred.answer
 ```
 ### 4. Save and Load Optimized Models
 ```python
 # Save
 optimized_qa.save("models/qa_v1.json")
 # Load
 loaded_qa = dspy.ChainOfThought("question -> answer")
 loaded_qa.load("models/qa_v1.json")
 ```
 ### 5. Monitor and Debug
 ```python
 # Enable tracing
 dspy.settings.configure(lm=lm, trace=[])
 # Run prediction
 result = qa(question="...")
 # Inspect trace
 for call in dspy.settings.trace:
    print(f"Prompt: {call['prompt']}")
    print(f"Response: {call['response']}")
 ```
 ## Comparison to Other Approaches
 | Feature | Manual Prompting | LangChain | DSPy |
 |---------|-----------------|-----------|------|
 | Prompt Engineering | Manual | Manual | Automatic |
 | Optimization | Trial & error | None | Data-driven |
 | Modularity | Low | Medium | High |
 | Type Safety | No | Limited | Yes (Signatures) |
 | Portability | Low | Medium | High |
 | Learning Curve | Low | Medium | Medium-High |
 **When to choose DSPy:**
 - You have training data or can generate it
 - You need systematic prompt improvement
 - You're building complex multi-stage systems
 - You want to optimize across different LMs
 **When to choose alternatives:**
 - Quick prototypes (manual prompting)
 - Simple chains with existing tools (LangChain)
 - Custom optimization logic needed
 ## Resources
 - **Documentation**: https://dspy.ai
 - **GitHub**: https://github.com/stanfordnlp/dspy (22k+ stars)
 - **Discord**: https://discord.gg/XCGy2WDCQB
 - **Twitter**: @DSPyOSS
 - **Paper**: "DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines"
 ## See Also
 - `references/modules.md` - Detailed module guide (Predict, ChainOfThought, ReAct, ProgramOfThought)
 - `references/optimizers.md` - Optimization algorithms (BootstrapFewShot, MIPRO, BootstrapFinetune)
 - `references/examples.md` - Real-world examples (RAG, agents, classifiers)
--- a/skills/mlops/dspy/references/examples.md
+++ b/skills/mlops/dspy/references/examples.md
@ -0,0 +1,663 @@
 # DSPy Real-World Examples
 Practical examples of building production systems with DSPy.
 ## Table of Contents
 - RAG Systems
 - Agent Systems
 - Classification
 - Data Processing
 - Multi-Stage Pipelines
 ## RAG Systems
 ### Basic RAG
 ```python
 import dspy
 class BasicRAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        passages = self.retrieve(question).passages
        context = "\n\n".join(passages)
        return self.generate(context=context, question=question)
 # Configure retriever (example with Chroma)
 from dspy.retrieve.chromadb_rm import ChromadbRM
 retriever = ChromadbRM(
    collection_name="my_docs",
    persist_directory="./chroma_db",
    k=3
 )
 dspy.settings.configure(rm=retriever)
 # Use RAG
 rag = BasicRAG()
 result = rag(question="What is DSPy?")
 print(result.answer)
 ```
 ### Optimized RAG
 ```python
 from dspy.teleprompt import BootstrapFewShot
 # Training data with question-answer pairs
 trainset = [
    dspy.Example(
        question="What is retrieval augmented generation?",
        answer="RAG combines retrieval of relevant documents with generation..."
    ).with_inputs("question"),
    # ... more examples
 ]
 # Define metric
 def answer_correctness(example, pred, trace=None):
    # Check if answer contains key information
    return example.answer.lower() in pred.answer.lower()
 # Optimize RAG
 optimizer = BootstrapFewShot(metric=answer_correctness)
 optimized_rag = optimizer.compile(rag, trainset=trainset)
 # Optimized RAG performs better on similar questions
 result = optimized_rag(question="Explain RAG systems")
 ```
 ### Multi-Hop RAG
 ```python
 class MultiHopRAG(dspy.Module):
    """RAG that follows chains of reasoning across documents."""
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=3)
        self.generate_query = dspy.ChainOfThought("question -> search_query")
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        # First retrieval
        query1 = self.generate_query(question=question).search_query
        passages1 = self.retrieve(query1).passages
        # Generate follow-up query based on first results
        context1 = "\n".join(passages1)
        query2 = self.generate_query(
            question=f"Based on: {context1}\nFollow-up: {question}"
        ).search_query
        # Second retrieval
        passages2 = self.retrieve(query2).passages
        # Combine all context
        all_context = "\n\n".join(passages1 + passages2)
        # Generate final answer
        return self.generate_answer(context=all_context, question=question)
 # Use multi-hop RAG
 multi_rag = MultiHopRAG()
 result = multi_rag(question="Who wrote the book that inspired Blade Runner?")
 # Hop 1: Find "Blade Runner was based on..."
 # Hop 2: Find author of that book
 ```
 ### RAG with Reranking
 ```python
 class RerankedRAG(dspy.Module):
    """RAG with learned reranking of retrieved passages."""
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=10)  # Get more candidates
        self.rerank = dspy.Predict("question, passage -> relevance_score: float")
        self.answer = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        # Retrieve candidates
        passages = self.retrieve(question).passages
        # Rerank passages
        scored_passages = []
        for passage in passages:
            score = float(self.rerank(
                question=question,
                passage=passage
            ).relevance_score)
            scored_passages.append((score, passage))
        # Take top 3 after reranking
        top_passages = [p for _, p in sorted(scored_passages, reverse=True)[:3]]
        context = "\n\n".join(top_passages)
        # Generate answer from reranked context
        return self.answer(context=context, question=question)
 ```
 ## Agent Systems
 ### ReAct Agent
 ```python
 from dspy.predict import ReAct
 # Define tools
 def search_wikipedia(query: str) -> str:
    """Search Wikipedia for information."""
    import wikipedia
    try:
        return wikipedia.summary(query, sentences=3)
    except:
        return "No results found"
 def calculate(expression: str) -> str:
    """Evaluate mathematical expression safely."""
    try:
        # Use safe eval
        result = eval(expression, {"__builtins__": {}}, {})
        return str(result)
    except:
        return "Invalid expression"
 def search_web(query: str) -> str:
    """Search the web."""
    # Your web search implementation
    return results
 # Create agent signature
 class ResearchAgent(dspy.Signature):
    """Answer questions using available tools."""
    question = dspy.InputField()
    answer = dspy.OutputField()
 # Create ReAct agent
 agent = ReAct(ResearchAgent, tools=[search_wikipedia, calculate, search_web])
 # Agent decides which tools to use
 result = agent(question="What is the population of France divided by 10?")
 # Agent:
 # 1. Thinks: "Need population of France"
 # 2. Acts: search_wikipedia("France population")
 # 3. Thinks: "Got 67 million, need to divide"
 # 4. Acts: calculate("67000000 / 10")
 # 5. Returns: "6,700,000"
 ```
 ### Multi-Agent System
 ```python
 class MultiAgentSystem(dspy.Module):
    """System with specialized agents for different tasks."""
    def __init__(self):
        super().__init__()
        # Router agent
        self.router = dspy.Predict("question -> agent_type: str")
        # Specialized agents
        self.research_agent = ReAct(
            ResearchAgent,
            tools=[search_wikipedia, search_web]
        )
        self.math_agent = dspy.ProgramOfThought("problem -> answer")
        self.reasoning_agent = dspy.ChainOfThought("question -> answer")
    def forward(self, question):
        # Route to appropriate agent
        agent_type = self.router(question=question).agent_type
        if agent_type == "research":
            return self.research_agent(question=question)
        elif agent_type == "math":
            return self.math_agent(problem=question)
        else:
            return self.reasoning_agent(question=question)
 # Use multi-agent system
 mas = MultiAgentSystem()
 result = mas(question="What is 15% of the GDP of France?")
 # Routes to research_agent for GDP, then to math_agent for calculation
 ```
 ## Classification
 ### Binary Classifier
 ```python
 class SentimentClassifier(dspy.Module):
    def __init__(self):
        super().__init__()
        self.classify = dspy.Predict("text -> sentiment: str")
    def forward(self, text):
        return self.classify(text=text)
 # Training data
 trainset = [
    dspy.Example(text="I love this!", sentiment="positive").with_inputs("text"),
    dspy.Example(text="Terrible experience", sentiment="negative").with_inputs("text"),
    # ... more examples
 ]
 # Optimize
 def accuracy(example, pred, trace=None):
    return example.sentiment == pred.sentiment
 optimizer = BootstrapFewShot(metric=accuracy, max_bootstrapped_demos=5)
 classifier = SentimentClassifier()
 optimized_classifier = optimizer.compile(classifier, trainset=trainset)
 # Use classifier
 result = optimized_classifier(text="This product is amazing!")
 print(result.sentiment)  # "positive"
 ```
 ### Multi-Class Classifier
 ```python
 class TopicClassifier(dspy.Module):
    def __init__(self):
        super().__init__()
        self.classify = dspy.ChainOfThought(
            "text -> category: str, confidence: float"
        )
    def forward(self, text):
        result = self.classify(text=text)
        return dspy.Prediction(
            category=result.category,
            confidence=float(result.confidence)
        )
 # Define categories in signature
 class TopicSignature(dspy.Signature):
    """Classify text into one of: technology, sports, politics, entertainment."""
    text = dspy.InputField()
    category = dspy.OutputField(desc="one of: technology, sports, politics, entertainment")
    confidence = dspy.OutputField(desc="0.0 to 1.0")
 classifier = dspy.ChainOfThought(TopicSignature)
 result = classifier(text="The Lakers won the championship")
 print(result.category)  # "sports"
 print(result.confidence)  # 0.95
 ```
 ### Hierarchical Classifier
 ```python
 class HierarchicalClassifier(dspy.Module):
    """Two-stage classification: coarse then fine-grained."""
    def __init__(self):
        super().__init__()
        self.coarse = dspy.Predict("text -> broad_category: str")
        self.fine_tech = dspy.Predict("text -> tech_subcategory: str")
        self.fine_sports = dspy.Predict("text -> sports_subcategory: str")
    def forward(self, text):
        # Stage 1: Broad category
        broad = self.coarse(text=text).broad_category
        # Stage 2: Fine-grained based on broad
        if broad == "technology":
            fine = self.fine_tech(text=text).tech_subcategory
        elif broad == "sports":
            fine = self.fine_sports(text=text).sports_subcategory
        else:
            fine = "other"
        return dspy.Prediction(broad_category=broad, fine_category=fine)
 ```
 ## Data Processing
 ### Text Summarization
 ```python
 class AdaptiveSummarizer(dspy.Module):
    """Summarizes text to target length."""
    def __init__(self):
        super().__init__()
        self.summarize = dspy.ChainOfThought("text, target_length -> summary")
    def forward(self, text, target_length="3 sentences"):
        return self.summarize(text=text, target_length=target_length)
 # Use summarizer
 summarizer = AdaptiveSummarizer()
 long_text = "..." # Long article
 short_summary = summarizer(long_text, target_length="1 sentence")
 medium_summary = summarizer(long_text, target_length="3 sentences")
 detailed_summary = summarizer(long_text, target_length="1 paragraph")
 ```
 ### Information Extraction
 ```python
 from pydantic import BaseModel, Field
 class PersonInfo(BaseModel):
    name: str = Field(description="Full name")
    age: int = Field(description="Age in years")
    occupation: str = Field(description="Job title")
    location: str = Field(description="City and country")
 class ExtractPerson(dspy.Signature):
    """Extract person information from text."""
    text = dspy.InputField()
    person: PersonInfo = dspy.OutputField()
 extractor = dspy.TypedPredictor(ExtractPerson)
 text = "Dr. Jane Smith, 42, is a neuroscientist at Stanford University in Palo Alto, California."
 result = extractor(text=text)
 print(result.person.name)       # "Dr. Jane Smith"
 print(result.person.age)        # 42
 print(result.person.occupation) # "neuroscientist"
 print(result.person.location)   # "Palo Alto, California"
 ```
 ### Batch Processing
 ```python
 class BatchProcessor(dspy.Module):
    """Process large datasets efficiently."""
    def __init__(self):
        super().__init__()
        self.process = dspy.Predict("text -> processed_text")
    def forward(self, texts):
        # Batch processing for efficiency
        return self.process.batch([{"text": t} for t in texts])
 # Process 1000 documents
 processor = BatchProcessor()
 results = processor(texts=large_dataset)
 # Results are returned in order
 for original, result in zip(large_dataset, results):
    print(f"{original} -> {result.processed_text}")
 ```
 ## Multi-Stage Pipelines
 ### Document Processing Pipeline
 ```python
 class DocumentPipeline(dspy.Module):
    """Multi-stage document processing."""
    def __init__(self):
        super().__init__()
        self.extract = dspy.Predict("document -> key_points")
        self.classify = dspy.Predict("key_points -> category")
        self.summarize = dspy.ChainOfThought("key_points, category -> summary")
        self.tag = dspy.Predict("summary -> tags")
    def forward(self, document):
        # Stage 1: Extract key points
        key_points = self.extract(document=document).key_points
        # Stage 2: Classify
        category = self.classify(key_points=key_points).category
        # Stage 3: Summarize
        summary = self.summarize(
            key_points=key_points,
            category=category
        ).summary
        # Stage 4: Generate tags
        tags = self.tag(summary=summary).tags
        return dspy.Prediction(
            key_points=key_points,
            category=category,
            summary=summary,
            tags=tags
        )
 ```
 ### Quality Control Pipeline
 ```python
 class QualityControlPipeline(dspy.Module):
    """Generate output and verify quality."""
    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought("prompt -> output")
        self.verify = dspy.Predict("output -> is_valid: bool, issues: str")
        self.improve = dspy.ChainOfThought("output, issues -> improved_output")
    def forward(self, prompt, max_iterations=3):
        output = self.generate(prompt=prompt).output
        for _ in range(max_iterations):
            # Verify output
            verification = self.verify(output=output)
            if verification.is_valid:
                return dspy.Prediction(output=output, iterations=_ + 1)
            # Improve based on issues
            output = self.improve(
                output=output,
                issues=verification.issues
            ).improved_output
        return dspy.Prediction(output=output, iterations=max_iterations)
 ```
 ## Production Tips
 ### 1. Caching for Performance
 ```python
 from functools import lru_cache
 class CachedRAG(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=3)
        self.generate = dspy.ChainOfThought("context, question -> answer")
    @lru_cache(maxsize=1000)
    def forward(self, question):
        passages = self.retrieve(question).passages
        context = "\n".join(passages)
        return self.generate(context=context, question=question).answer
 ```
 ### 2. Error Handling
 ```python
 class RobustModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.process = dspy.ChainOfThought("input -> output")
    def forward(self, input):
        try:
            result = self.process(input=input)
            return result
        except Exception as e:
            # Log error
            print(f"Error processing {input}: {e}")
            # Return fallback
            return dspy.Prediction(output="Error: could not process input")
 ```
 ### 3. Monitoring
 ```python
 class MonitoredModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.process = dspy.ChainOfThought("input -> output")
        self.call_count = 0
        self.errors = 0
    def forward(self, input):
        self.call_count += 1
        try:
            result = self.process(input=input)
            return result
        except Exception as e:
            self.errors += 1
            raise
    def get_stats(self):
        return {
            "calls": self.call_count,
            "errors": self.errors,
            "error_rate": self.errors / max(self.call_count, 1)
        }
 ```
 ### 4. A/B Testing
 ```python
 class ABTestModule(dspy.Module):
    """Run two variants and compare."""
    def __init__(self, variant_a, variant_b):
        super().__init__()
        self.variant_a = variant_a
        self.variant_b = variant_b
        self.a_calls = 0
        self.b_calls = 0
    def forward(self, input, variant="a"):
        if variant == "a":
            self.a_calls += 1
            return self.variant_a(input=input)
        else:
            self.b_calls += 1
            return self.variant_b(input=input)
 # Compare two optimizers
 baseline = dspy.ChainOfThought("question -> answer")
 optimized = BootstrapFewShot(...).compile(baseline, trainset=trainset)
 ab_test = ABTestModule(variant_a=baseline, variant_b=optimized)
 # Route 50% to each
 import random
 variant = "a" if random.random() < 0.5 else "b"
 result = ab_test(input=question, variant=variant)
 ```
 ## Complete Example: Customer Support Bot
 ```python
 import dspy
 from dspy.teleprompt import BootstrapFewShot
 class CustomerSupportBot(dspy.Module):
    """Complete customer support system."""
    def __init__(self):
        super().__init__()
        # Classify intent
        self.classify_intent = dspy.Predict("message -> intent: str")
        # Specialized handlers
        self.technical_handler = dspy.ChainOfThought("message, history -> response")
        self.billing_handler = dspy.ChainOfThought("message, history -> response")
        self.general_handler = dspy.Predict("message, history -> response")
        # Retrieve relevant docs
        self.retrieve = dspy.Retrieve(k=3)
        # Conversation history
        self.history = []
    def forward(self, message):
        # Classify intent
        intent = self.classify_intent(message=message).intent
        # Retrieve relevant documentation
        docs = self.retrieve(message).passages
        context = "\n".join(docs)
        # Add context to history
        history_str = "\n".join(self.history)
        full_message = f"Context: {context}\n\nMessage: {message}"
        # Route to appropriate handler
        if intent == "technical":
            response = self.technical_handler(
                message=full_message,
                history=history_str
            ).response
        elif intent == "billing":
            response = self.billing_handler(
                message=full_message,
                history=history_str
            ).response
        else:
            response = self.general_handler(
                message=full_message,
                history=history_str
            ).response
        # Update history
        self.history.append(f"User: {message}")
        self.history.append(f"Bot: {response}")
        return dspy.Prediction(response=response, intent=intent)
 # Training data
 trainset = [
    dspy.Example(
        message="My account isn't working",
        intent="technical",
        response="I'd be happy to help. What error are you seeing?"
    ).with_inputs("message"),
    # ... more examples
 ]
 # Define metric
 def response_quality(example, pred, trace=None):
    # Check if response is helpful
    if len(pred.response) < 20:
        return 0.0
    if example.intent != pred.intent:
        return 0.3
    return 1.0
 # Optimize
 optimizer = BootstrapFewShot(metric=response_quality)
 bot = CustomerSupportBot()
 optimized_bot = optimizer.compile(bot, trainset=trainset)
 # Use in production
 optimized_bot.save("models/support_bot_v1.json")
 # Later, load and use
 loaded_bot = CustomerSupportBot()
 loaded_bot.load("models/support_bot_v1.json")
 response = loaded_bot(message="I can't log in")
 ```
 ## Resources
 - **Documentation**: https://dspy.ai
 - **Examples Repo**: https://github.com/stanfordnlp/dspy/tree/main/examples
 - **Discord**: https://discord.gg/XCGy2WDCQB
--- a/skills/mlops/dspy/references/modules.md
+++ b/skills/mlops/dspy/references/modules.md
@ -0,0 +1,475 @@
 # DSPy Modules
 Complete guide to DSPy's built-in modules for language model programming.
 ## Module Basics
 DSPy modules are composable building blocks inspired by PyTorch's NN modules:
 - Have learnable parameters (prompts, few-shot examples)
 - Can be composed using Python control flow
 - Generalized to handle any signature
 - Optimizable with DSPy optimizers
 ### Base Module Pattern
 ```python
 import dspy
 class CustomModule(dspy.Module):
    def __init__(self):
        super().__init__()
        # Initialize sub-modules
        self.predictor = dspy.Predict("input -> output")
    def forward(self, input):
        # Module logic
        result = self.predictor(input=input)
        return result
 ```
 ## Core Modules
 ### dspy.Predict
 **Basic prediction module** - Makes LM calls without reasoning steps.
 ```python
 # Inline signature
 qa = dspy.Predict("question -> answer")
 result = qa(question="What is 2+2?")
 # Class signature
 class QA(dspy.Signature):
    """Answer questions concisely."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="short, factual answer")
 qa = dspy.Predict(QA)
 result = qa(question="What is the capital of France?")
 print(result.answer)  # "Paris"
 ```
 **When to use:**
 - Simple, direct predictions
 - No reasoning steps needed
 - Fast responses required
 ### dspy.ChainOfThought
 **Step-by-step reasoning** - Generates rationale before answer.
 **Parameters:**
 - `signature`: Task signature
 - `rationale_field`: Custom reasoning field (optional)
 - `rationale_field_type`: Type for rationale (default: `str`)
 ```python
 # Basic usage
 cot = dspy.ChainOfThought("question -> answer")
 result = cot(question="If I have 5 apples and give away 2, how many remain?")
 print(result.rationale)  # "Let's think step by step..."
 print(result.answer)     # "3"
 # Custom rationale field
 cot = dspy.ChainOfThought(
    signature="problem -> solution",
    rationale_field=dspy.OutputField(
        prefix="Reasoning: Let's break this down step by step to"
    )
 )
 ```
 **When to use:**
 - Complex reasoning tasks
 - Math word problems
 - Logical deduction
 - Quality > speed
 **Performance:**
 - ~2x slower than Predict
 - Significantly better accuracy on reasoning tasks
 ### dspy.ProgramOfThought
 **Code-based reasoning** - Generates and executes Python code.
 ```python
 pot = dspy.ProgramOfThought("question -> answer")
 result = pot(question="What is 15% of 240?")
 # Internally generates: answer = 240 * 0.15
 # Executes code and returns result
 print(result.answer)  # 36.0
 result = pot(question="If a train travels 60 mph for 2.5 hours, how far does it go?")
 # Generates: distance = 60 * 2.5
 print(result.answer)  # 150.0
 ```
 **When to use:**
 - Arithmetic calculations
 - Symbolic math
 - Data transformations
 - Deterministic computations
 **Benefits:**
 - More reliable than text-based math
 - Handles complex calculations
 - Transparent (shows generated code)
 ### dspy.ReAct
 **Reasoning + Acting** - Agent that uses tools iteratively.
 ```python
 from dspy.predict import ReAct
 # Define tools
 def search_wikipedia(query: str) -> str:
    """Search Wikipedia for information."""
    # Your search implementation
    return search_results
 def calculate(expression: str) -> float:
    """Evaluate a mathematical expression."""
    return eval(expression)
 # Create ReAct agent
 class ResearchQA(dspy.Signature):
    """Answer questions using available tools."""
    question = dspy.InputField()
    answer = dspy.OutputField()
 react = ReAct(ResearchQA, tools=[search_wikipedia, calculate])
 # Agent decides which tools to use
 result = react(question="How old was Einstein when he published special relativity?")
 # Internally:
 # 1. Thinks: "Need birth year and publication year"
 # 2. Acts: search_wikipedia("Albert Einstein")
 # 3. Acts: search_wikipedia("Special relativity 1905")
 # 4. Acts: calculate("1905 - 1879")
 # 5. Returns: "26 years old"
 ```
 **When to use:**
 - Multi-step research tasks
 - Tool-using agents
 - Complex information retrieval
 - Tasks requiring multiple API calls
 **Best practices:**
 - Keep tool descriptions clear and specific
 - Limit to 5-7 tools (too many = confusion)
 - Provide tool usage examples in docstrings
 ### dspy.MultiChainComparison
 **Generate multiple outputs and compare** - Self-consistency pattern.
 ```python
 mcc = dspy.MultiChainComparison("question -> answer", M=5)
 result = mcc(question="What is the capital of France?")
 # Generates 5 candidate answers
 # Compares and selects most consistent
 print(result.answer)  # "Paris"
 print(result.candidates)  # All 5 generated answers
 ```
 **Parameters:**
 - `M`: Number of candidates to generate (default: 5)
 - `temperature`: Sampling temperature for diversity
 **When to use:**
 - High-stakes decisions
 - Ambiguous questions
 - When single answer may be unreliable
 **Tradeoff:**
 - M times slower (M parallel calls)
 - Higher accuracy on ambiguous tasks
 ### dspy.majority
 **Majority voting over multiple predictions.**
 ```python
 from dspy.primitives import majority
 # Generate multiple predictions
 predictor = dspy.Predict("question -> answer")
 predictions = [predictor(question="What is 2+2?") for _ in range(5)]
 # Take majority vote
 answer = majority([p.answer for p in predictions])
 print(answer)  # "4"
 ```
 **When to use:**
 - Combining multiple model outputs
 - Reducing variance in predictions
 - Ensemble approaches
 ## Advanced Modules
 ### dspy.TypedPredictor
 **Structured output with Pydantic models.**
 ```python
 from pydantic import BaseModel, Field
 class PersonInfo(BaseModel):
    name: str = Field(description="Full name")
    age: int = Field(description="Age in years")
    occupation: str = Field(description="Current job")
 class ExtractPerson(dspy.Signature):
    """Extract person information from text."""
    text = dspy.InputField()
    person: PersonInfo = dspy.OutputField()
 extractor = dspy.TypedPredictor(ExtractPerson)
 result = extractor(text="John Doe is a 35-year-old software engineer.")
 print(result.person.name)       # "John Doe"
 print(result.person.age)        # 35
 print(result.person.occupation) # "software engineer"
 ```
 **Benefits:**
 - Type safety
 - Automatic validation
 - JSON schema generation
 - IDE autocomplete
 ### dspy.Retry
 **Automatic retry with validation.**
 ```python
 from dspy.primitives import Retry
 def validate_number(example, pred, trace=None):
    """Validate output is a number."""
    try:
        float(pred.answer)
        return True
    except ValueError:
        return False
 # Retry up to 3 times if validation fails
 qa = Retry(
    dspy.ChainOfThought("question -> answer"),
    validate=validate_number,
    max_retries=3
 )
 result = qa(question="What is 15% of 80?")
 # If first attempt returns non-numeric, retries automatically
 ```
 ### dspy.Assert
 **Assertion-driven optimization.**
 ```python
 import dspy
 from dspy.primitives.assertions import assert_transform_module, backtrack_handler
 class ValidatedQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.qa = dspy.ChainOfThought("question -> answer: float")
    def forward(self, question):
        answer = self.qa(question=question).answer
        # Assert answer is numeric
        dspy.Assert(
            isinstance(float(answer), float),
            "Answer must be a number",
            backtrack=backtrack_handler
        )
        return dspy.Prediction(answer=answer)
 ```
 **Benefits:**
 - Catches errors during optimization
 - Guides LM toward valid outputs
 - Better than post-hoc filtering
 ## Module Composition
 ### Sequential Pipeline
 ```python
 class Pipeline(dspy.Module):
    def __init__(self):
        super().__init__()
        self.stage1 = dspy.Predict("input -> intermediate")
        self.stage2 = dspy.ChainOfThought("intermediate -> output")
    def forward(self, input):
        intermediate = self.stage1(input=input).intermediate
        output = self.stage2(intermediate=intermediate).output
        return dspy.Prediction(output=output)
 ```
 ### Conditional Logic
 ```python
 class ConditionalModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.router = dspy.Predict("question -> category: str")
        self.simple_qa = dspy.Predict("question -> answer")
        self.complex_qa = dspy.ChainOfThought("question -> answer")
    def forward(self, question):
        category = self.router(question=question).category
        if category == "simple":
            return self.simple_qa(question=question)
        else:
            return self.complex_qa(question=question)
 ```
 ### Parallel Execution
 ```python
 class ParallelModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.approach1 = dspy.ChainOfThought("question -> answer")
        self.approach2 = dspy.ProgramOfThought("question -> answer")
    def forward(self, question):
        # Run both approaches
        answer1 = self.approach1(question=question).answer
        answer2 = self.approach2(question=question).answer
        # Compare or combine results
        if answer1 == answer2:
            return dspy.Prediction(answer=answer1, confidence="high")
        else:
            return dspy.Prediction(answer=answer1, confidence="low")
 ```
 ## Batch Processing
 All modules support batch processing for efficiency:
 ```python
 cot = dspy.ChainOfThought("question -> answer")
 questions = [
    "What is 2+2?",
    "What is 3+3?",
    "What is 4+4?"
 ]
 # Process all at once
 results = cot.batch([{"question": q} for q in questions])
 for result in results:
    print(result.answer)
 ```
 ## Saving and Loading
 ```python
 # Save module
 qa = dspy.ChainOfThought("question -> answer")
 qa.save("models/qa_v1.json")
 # Load module
 loaded_qa = dspy.ChainOfThought("question -> answer")
 loaded_qa.load("models/qa_v1.json")
 ```
 **What gets saved:**
 - Few-shot examples
 - Prompt instructions
 - Module configuration
 **What doesn't get saved:**
 - Model weights (DSPy doesn't fine-tune by default)
 - LM provider configuration
 ## Module Selection Guide
 | Task | Module | Reason |
 |------|--------|--------|
 | Simple classification | Predict | Fast, direct |
 | Math word problems | ProgramOfThought | Reliable calculations |
 | Logical reasoning | ChainOfThought | Better with steps |
 | Multi-step research | ReAct | Tool usage |
 | High-stakes decisions | MultiChainComparison | Self-consistency |
 | Structured extraction | TypedPredictor | Type safety |
 | Ambiguous questions | MultiChainComparison | Multiple perspectives |
 ## Performance Tips
 1. **Start with Predict**, add reasoning only if needed
 2. **Use batch processing** for multiple inputs
 3. **Cache predictions** for repeated queries
 4. **Profile token usage** with `track_usage=True`
 5. **Optimize after prototyping** with teleprompters
 ## Common Patterns
 ### Pattern: Retrieval + Generation
 ```python
 class RAG(dspy.Module):
    def __init__(self, k=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=k)
        self.generate = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        context = self.retrieve(question).passages
        return self.generate(context=context, question=question)
 ```
 ### Pattern: Verification Loop
 ```python
 class VerifiedQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.answer = dspy.ChainOfThought("question -> answer")
        self.verify = dspy.Predict("question, answer -> is_correct: bool")
    def forward(self, question, max_attempts=3):
        for _ in range(max_attempts):
            answer = self.answer(question=question).answer
            is_correct = self.verify(question=question, answer=answer).is_correct
            if is_correct:
                return dspy.Prediction(answer=answer)
        return dspy.Prediction(answer="Unable to verify answer")
 ```
 ### Pattern: Multi-Turn Dialog
 ```python
 class DialogAgent(dspy.Module):
    def __init__(self):
        super().__init__()
        self.respond = dspy.Predict("history, user_message -> assistant_message")
        self.history = []
    def forward(self, user_message):
        history_str = "\n".join(self.history)
        response = self.respond(history=history_str, user_message=user_message)
        self.history.append(f"User: {user_message}")
        self.history.append(f"Assistant: {response.assistant_message}")
        return response
 ```
--- a/skills/mlops/dspy/references/optimizers.md
+++ b/skills/mlops/dspy/references/optimizers.md
@ -0,0 +1,566 @@
 # DSPy Optimizers (Teleprompters)
 Complete guide to DSPy's optimization algorithms for improving prompts and model weights.
 ## What are Optimizers?
 DSPy optimizers (called "teleprompters") automatically improve your modules by:
 - **Synthesizing few-shot examples** from training data
 - **Proposing better instructions** through search
 - **Fine-tuning model weights** (optional)
 **Key idea**: Instead of manually tuning prompts, define a metric and let DSPy optimize.
 ## Optimizer Selection Guide
 | Optimizer | Best For | Speed | Quality | Data Needed |
 |-----------|----------|-------|---------|-------------|
 | BootstrapFewShot | General purpose | Fast | Good | 10-50 examples |
 | MIPRO | Instruction tuning | Medium | Excellent | 50-200 examples |
 | BootstrapFinetune | Fine-tuning | Slow | Excellent | 100+ examples |
 | COPRO | Prompt optimization | Medium | Good | 20-100 examples |
 | KNNFewShot | Quick baseline | Very fast | Fair | 10+ examples |
 ## Core Optimizers
 ### BootstrapFewShot
 **Most popular optimizer** - Generates few-shot demonstrations from training data.
 **How it works:**
 1. Takes your training examples
 2. Uses your module to generate predictions
 3. Selects high-quality predictions (based on metric)
 4. Uses these as few-shot examples in future prompts
 **Parameters:**
 - `metric`: Function that scores predictions (required)
 - `max_bootstrapped_demos`: Max demonstrations to generate (default: 4)
 - `max_labeled_demos`: Max labeled examples to use (default: 16)
 - `max_rounds`: Optimization iterations (default: 1)
 - `metric_threshold`: Minimum score to accept (optional)
 ```python
 import dspy
 from dspy.teleprompt import BootstrapFewShot
 # Define metric
 def validate_answer(example, pred, trace=None):
    """Return True if prediction matches gold answer."""
    return example.answer.lower() == pred.answer.lower()
 # Training data
 trainset = [
    dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
    dspy.Example(question="What is 3+5?", answer="8").with_inputs("question"),
    dspy.Example(question="What is 10-3?", answer="7").with_inputs("question"),
 ]
 # Create module
 qa = dspy.ChainOfThought("question -> answer")
 # Optimize
 optimizer = BootstrapFewShot(
    metric=validate_answer,
    max_bootstrapped_demos=3,
    max_rounds=2
 )
 optimized_qa = optimizer.compile(qa, trainset=trainset)
 # Now optimized_qa has learned few-shot examples!
 result = optimized_qa(question="What is 5+7?")
 ```
 **Best practices:**
 - Start with 10-50 training examples
 - Use diverse examples covering edge cases
 - Set `max_bootstrapped_demos=3-5` for most tasks
 - Increase `max_rounds=2-3` for better quality
 **When to use:**
 - First optimizer to try
 - You have 10+ labeled examples
 - Want quick improvements
 - General-purpose tasks
 ### MIPRO (Most Important Prompt Optimization)
 **State-of-the-art optimizer** - Iteratively searches for better instructions.
 **How it works:**
 1. Generates candidate instructions
 2. Tests each on validation set
 3. Selects best-performing instructions
 4. Iterates to refine further
 **Parameters:**
 - `metric`: Evaluation metric (required)
 - `num_candidates`: Instructions to try per iteration (default: 10)
 - `init_temperature`: Sampling temperature (default: 1.0)
 - `verbose`: Show progress (default: False)
 ```python
 from dspy.teleprompt import MIPRO
 # Define metric with more nuance
 def answer_quality(example, pred, trace=None):
    """Score answer quality 0-1."""
    if example.answer.lower() in pred.answer.lower():
        return 1.0
    # Partial credit for similar answers
    return 0.5 if len(set(example.answer.split()) & set(pred.answer.split())) > 0 else 0.0
 # Larger training set (MIPRO benefits from more data)
 trainset = [...]  # 50-200 examples
 valset = [...]    # 20-50 examples
 # Create module
 qa = dspy.ChainOfThought("question -> answer")
 # Optimize with MIPRO
 optimizer = MIPRO(
    metric=answer_quality,
    num_candidates=10,
    init_temperature=1.0,
    verbose=True
 )
 optimized_qa = optimizer.compile(
    student=qa,
    trainset=trainset,
    valset=valset,  # MIPRO uses separate validation set
    num_trials=100   # More trials = better quality
 )
 ```
 **Best practices:**
 - Use 50-200 training examples
 - Separate validation set (20-50 examples)
 - Run 100-200 trials for best results
 - Takes 10-30 minutes typically
 **When to use:**
 - You have 50+ labeled examples
 - Want state-of-the-art performance
 - Willing to wait for optimization
 - Complex reasoning tasks
 ### BootstrapFinetune
 **Fine-tune model weights** - Creates training dataset for fine-tuning.
 **How it works:**
 1. Generates synthetic training data
 2. Exports data in fine-tuning format
 3. You fine-tune model separately
 4. Load fine-tuned model back
 **Parameters:**
 - `metric`: Evaluation metric (required)
 - `max_bootstrapped_demos`: Demonstrations to generate (default: 4)
 - `max_rounds`: Data generation rounds (default: 1)
 ```python
 from dspy.teleprompt import BootstrapFinetune
 # Training data
 trainset = [...]  # 100+ examples recommended
 # Define metric
 def validate(example, pred, trace=None):
    return example.answer == pred.answer
 # Create module
 qa = dspy.ChainOfThought("question -> answer")
 # Generate fine-tuning data
 optimizer = BootstrapFinetune(metric=validate)
 optimized_qa = optimizer.compile(qa, trainset=trainset)
 # Exports training data to file
 # You then fine-tune using your LM provider's API
 # After fine-tuning, load your model:
 finetuned_lm = dspy.OpenAI(model="ft:gpt-3.5-turbo:your-model-id")
 dspy.settings.configure(lm=finetuned_lm)
 ```
 **Best practices:**
 - Use 100+ training examples
 - Validate on held-out test set
 - Monitor for overfitting
 - Compare with prompt-based methods first
 **When to use:**
 - You have 100+ examples
 - Latency is critical (fine-tuned models faster)
 - Task is narrow and well-defined
 - Prompt optimization isn't enough
 ### COPRO (Coordinate Prompt Optimization)
 **Optimize prompts via gradient-free search.**
 **How it works:**
 1. Generates prompt variants
 2. Evaluates each variant
 3. Selects best prompts
 4. Iterates to refine
 ```python
 from dspy.teleprompt import COPRO
 # Training data
 trainset = [...]
 # Define metric
 def metric(example, pred, trace=None):
    return example.answer == pred.answer
 # Create module
 qa = dspy.ChainOfThought("question -> answer")
 # Optimize with COPRO
 optimizer = COPRO(
    metric=metric,
    breadth=10,  # Candidates per iteration
    depth=3      # Optimization rounds
 )
 optimized_qa = optimizer.compile(qa, trainset=trainset)
 ```
 **When to use:**
 - Want prompt optimization
 - Have 20-100 examples
 - MIPRO too slow
 ### KNNFewShot
 **Simple k-nearest neighbors** - Selects similar examples for each query.
 **How it works:**
 1. Embeds all training examples
 2. For each query, finds k most similar examples
 3. Uses these as few-shot demonstrations
 ```python
 from dspy.teleprompt import KNNFewShot
 trainset = [...]
 # No metric needed - just selects similar examples
 optimizer = KNNFewShot(k=3)
 optimized_qa = optimizer.compile(qa, trainset=trainset)
 # For each query, uses 3 most similar examples from trainset
 ```
 **When to use:**
 - Quick baseline
 - Have diverse training examples
 - Similarity is good proxy for helpfulness
 ## Writing Metrics
 Metrics are functions that score predictions. They're critical for optimization.
 ### Binary Metrics
 ```python
 def exact_match(example, pred, trace=None):
    """Return True if prediction exactly matches gold."""
    return example.answer == pred.answer
 def contains_answer(example, pred, trace=None):
    """Return True if prediction contains gold answer."""
    return example.answer.lower() in pred.answer.lower()
 ```
 ### Continuous Metrics
 ```python
 def f1_score(example, pred, trace=None):
    """F1 score between prediction and gold."""
    pred_tokens = set(pred.answer.lower().split())
    gold_tokens = set(example.answer.lower().split())
    if not pred_tokens:
        return 0.0
    precision = len(pred_tokens & gold_tokens) / len(pred_tokens)
    recall = len(pred_tokens & gold_tokens) / len(gold_tokens)
    if precision + recall == 0:
        return 0.0
    return 2 * (precision * recall) / (precision + recall)
 def semantic_similarity(example, pred, trace=None):
    """Embedding similarity between prediction and gold."""
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer('all-MiniLM-L6-v2')
    emb1 = model.encode(example.answer)
    emb2 = model.encode(pred.answer)
    similarity = cosine_similarity(emb1, emb2)
    return similarity
 ```
 ### Multi-Factor Metrics
 ```python
 def comprehensive_metric(example, pred, trace=None):
    """Combine multiple factors."""
    score = 0.0
    # Correctness (50%)
    if example.answer.lower() in pred.answer.lower():
        score += 0.5
    # Conciseness (25%)
    if len(pred.answer.split()) <= 20:
        score += 0.25
    # Citation (25%)
    if "source:" in pred.answer.lower():
        score += 0.25
    return score
 ```
 ### Using Trace for Debugging
 ```python
 def metric_with_trace(example, pred, trace=None):
    """Metric that uses trace for debugging."""
    is_correct = example.answer == pred.answer
    if trace is not None and not is_correct:
        # Log failures for analysis
        print(f"Failed on: {example.question}")
        print(f"Expected: {example.answer}")
        print(f"Got: {pred.answer}")
    return is_correct
 ```
 ## Evaluation Best Practices
 ### Train/Val/Test Split
 ```python
 # Split data
 trainset = data[:100]   # 70%
 valset = data[100:120]  # 15%
 testset = data[120:]    # 15%
 # Optimize on train
 optimized = optimizer.compile(module, trainset=trainset)
 # Validate during optimization (for MIPRO)
 optimized = optimizer.compile(module, trainset=trainset, valset=valset)
 # Evaluate on test
 from dspy.evaluate import Evaluate
 evaluator = Evaluate(devset=testset, metric=metric)
 score = evaluator(optimized)
 ```
 ### Cross-Validation
 ```python
 from sklearn.model_selection import KFold
 kfold = KFold(n_splits=5)
 scores = []
 for train_idx, val_idx in kfold.split(data):
    trainset = [data[i] for i in train_idx]
    valset = [data[i] for i in val_idx]
    optimized = optimizer.compile(module, trainset=trainset)
    score = evaluator(optimized, devset=valset)
    scores.append(score)
 print(f"Average score: {sum(scores) / len(scores):.2f}")
 ```
 ### Comparing Optimizers
 ```python
 results = {}
 for opt_name, optimizer in [
    ("baseline", None),
    ("fewshot", BootstrapFewShot(metric=metric)),
    ("mipro", MIPRO(metric=metric)),
 ]:
    if optimizer is None:
        module_opt = module
    else:
        module_opt = optimizer.compile(module, trainset=trainset)
    score = evaluator(module_opt, devset=testset)
    results[opt_name] = score
 print(results)
 # {'baseline': 0.65, 'fewshot': 0.78, 'mipro': 0.85}
 ```
 ## Advanced Patterns
 ### Custom Optimizer
 ```python
 from dspy.teleprompt import Teleprompter
 class CustomOptimizer(Teleprompter):
    def __init__(self, metric):
        self.metric = metric
    def compile(self, student, trainset, **kwargs):
        # Your optimization logic here
        # Return optimized student module
        return student
 ```
 ### Multi-Stage Optimization
 ```python
 # Stage 1: Bootstrap few-shot
 stage1 = BootstrapFewShot(metric=metric, max_bootstrapped_demos=3)
 optimized1 = stage1.compile(module, trainset=trainset)
 # Stage 2: Instruction tuning
 stage2 = MIPRO(metric=metric, num_candidates=10)
 optimized2 = stage2.compile(optimized1, trainset=trainset, valset=valset)
 # Final optimized module
 final_module = optimized2
 ```
 ### Ensemble Optimization
 ```python
 class EnsembleModule(dspy.Module):
    def __init__(self, modules):
        super().__init__()
        self.modules = modules
    def forward(self, question):
        predictions = [m(question=question).answer for m in self.modules]
        # Vote or average
        return dspy.Prediction(answer=max(set(predictions), key=predictions.count))
 # Optimize multiple modules
 opt1 = BootstrapFewShot(metric=metric).compile(module, trainset=trainset)
 opt2 = MIPRO(metric=metric).compile(module, trainset=trainset)
 opt3 = COPRO(metric=metric).compile(module, trainset=trainset)
 # Ensemble
 ensemble = EnsembleModule([opt1, opt2, opt3])
 ```
 ## Optimization Workflow
 ### 1. Start with Baseline
 ```python
 # No optimization
 baseline = dspy.ChainOfThought("question -> answer")
 baseline_score = evaluator(baseline, devset=testset)
 print(f"Baseline: {baseline_score}")
 ```
 ### 2. Try BootstrapFewShot
 ```python
 # Quick optimization
 fewshot = BootstrapFewShot(metric=metric, max_bootstrapped_demos=3)
 optimized = fewshot.compile(baseline, trainset=trainset)
 fewshot_score = evaluator(optimized, devset=testset)
 print(f"Few-shot: {fewshot_score} (+{fewshot_score - baseline_score:.2f})")
 ```
 ### 3. If More Data Available, Try MIPRO
 ```python
 # State-of-the-art optimization
 mipro = MIPRO(metric=metric, num_candidates=10)
 optimized_mipro = mipro.compile(baseline, trainset=trainset, valset=valset)
 mipro_score = evaluator(optimized_mipro, devset=testset)
 print(f"MIPRO: {mipro_score} (+{mipro_score - baseline_score:.2f})")
 ```
 ### 4. Save Best Model
 ```python
 if mipro_score > fewshot_score:
    optimized_mipro.save("models/best_model.json")
 else:
    optimized.save("models/best_model.json")
 ```
 ## Common Pitfalls
 ### 1. Overfitting to Training Data
 ```python
 # ❌ Bad: Too many demos
 optimizer = BootstrapFewShot(max_bootstrapped_demos=20)  # Overfits!
 # ✅ Good: Moderate demos
 optimizer = BootstrapFewShot(max_bootstrapped_demos=3-5)
 ```
 ### 2. Metric Doesn't Match Task
 ```python
 # ❌ Bad: Binary metric for nuanced task
 def bad_metric(example, pred, trace=None):
    return example.answer == pred.answer  # Too strict!
 # ✅ Good: Graded metric
 def good_metric(example, pred, trace=None):
    return f1_score(example.answer, pred.answer)  # Allows partial credit
 ```
 ### 3. Insufficient Training Data
 ```python
 # ❌ Bad: Too little data
 trainset = data[:5]  # Not enough!
 # ✅ Good: Sufficient data
 trainset = data[:50]  # Better
 ```
 ### 4. No Validation Set
 ```python
 # ❌ Bad: Optimizing on test set
 optimizer.compile(module, trainset=testset)  # Cheating!
 # ✅ Good: Proper splits
 optimizer.compile(module, trainset=trainset, valset=valset)
 evaluator(optimized, devset=testset)
 ```
 ## Performance Tips
 1. **Start simple**: BootstrapFewShot first
 2. **Use representative data**: Cover edge cases
 3. **Monitor overfitting**: Validate on held-out set
 4. **Iterate metrics**: Refine based on failures
 5. **Save checkpoints**: Don't lose progress
 6. **Compare to baseline**: Measure improvement
 7. **Test multiple optimizers**: Find best fit
 ## Resources
 - **Paper**: "DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines"
 - **GitHub**: https://github.com/stanfordnlp/dspy
 - **Discord**: https://discord.gg/XCGy2WDCQB
--- a/skills/mlops/faiss/SKILL.md
+++ b/skills/mlops/faiss/SKILL.md
@ -0,0 +1,221 @@
 ---
 name: faiss
 description: Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or when you need pure similarity search without metadata. Best for high-performance applications.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [RAG, FAISS, Similarity Search, Vector Search, Facebook AI, GPU Acceleration, Billion-Scale, K-NN, HNSW, High Performance, Large Scale]
 dependencies: [faiss-cpu, faiss-gpu, numpy]
 ---
 # FAISS - Efficient Similarity Search
 Facebook AI's library for billion-scale vector similarity search.
 ## When to use FAISS
 **Use FAISS when:**
 - Need fast similarity search on large vector datasets (millions/billions)
 - GPU acceleration required
 - Pure vector similarity (no metadata filtering needed)
 - High throughput, low latency critical
 - Offline/batch processing of embeddings
 **Metrics**:
 - **31,700+ GitHub stars**
 - Meta/Facebook AI Research
 - **Handles billions of vectors**
 - **C++** with Python bindings
 **Use alternatives instead**:
 - **Chroma/Pinecone**: Need metadata filtering
 - **Weaviate**: Need full database features
 - **Annoy**: Simpler, fewer features
 ## Quick start
 ### Installation
 ```bash
 # CPU only
 pip install faiss-cpu
 # GPU support
 pip install faiss-gpu
 ```
 ### Basic usage
 ```python
 import faiss
 import numpy as np
 # Create sample data (1000 vectors, 128 dimensions)
 d = 128
 nb = 1000
 vectors = np.random.random((nb, d)).astype('float32')
 # Create index
 index = faiss.IndexFlatL2(d)  # L2 distance
 index.add(vectors)             # Add vectors
 # Search
 k = 5  # Find 5 nearest neighbors
 query = np.random.random((1, d)).astype('float32')
 distances, indices = index.search(query, k)
 print(f"Nearest neighbors: {indices}")
 print(f"Distances: {distances}")
 ```
 ## Index types
 ### 1. Flat (exact search)
 ```python
 # L2 (Euclidean) distance
 index = faiss.IndexFlatL2(d)
 # Inner product (cosine similarity if normalized)
 index = faiss.IndexFlatIP(d)
 # Slowest, most accurate
 ```
 ### 2. IVF (inverted file) - Fast approximate
 ```python
 # Create quantizer
 quantizer = faiss.IndexFlatL2(d)
 # IVF index with 100 clusters
 nlist = 100
 index = faiss.IndexIVFFlat(quantizer, d, nlist)
 # Train on data
 index.train(vectors)
 # Add vectors
 index.add(vectors)
 # Search (nprobe = clusters to search)
 index.nprobe = 10
 distances, indices = index.search(query, k)
 ```
 ### 3. HNSW (Hierarchical NSW) - Best quality/speed
 ```python
 # HNSW index
 M = 32  # Number of connections per layer
 index = faiss.IndexHNSWFlat(d, M)
 # No training needed
 index.add(vectors)
 # Search
 distances, indices = index.search(query, k)
 ```
 ### 4. Product Quantization - Memory efficient
 ```python
 # PQ reduces memory by 16-32×
 m = 8   # Number of subquantizers
 nbits = 8
 index = faiss.IndexPQ(d, m, nbits)
 # Train and add
 index.train(vectors)
 index.add(vectors)
 ```
 ## Save and load
 ```python
 # Save index
 faiss.write_index(index, "large.index")
 # Load index
 index = faiss.read_index("large.index")
 # Continue using
 distances, indices = index.search(query, k)
 ```
 ## GPU acceleration
 ```python
 # Single GPU
 res = faiss.StandardGpuResources()
 index_cpu = faiss.IndexFlatL2(d)
 index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu)  # GPU 0
 # Multi-GPU
 index_gpu = faiss.index_cpu_to_all_gpus(index_cpu)
 # 10-100× faster than CPU
 ```
 ## LangChain integration
 ```python
 from langchain_community.vectorstores import FAISS
 from langchain_openai import OpenAIEmbeddings
 # Create FAISS vector store
 vectorstore = FAISS.from_documents(docs, OpenAIEmbeddings())
 # Save
 vectorstore.save_local("faiss_index")
 # Load
 vectorstore = FAISS.load_local(
    "faiss_index",
    OpenAIEmbeddings(),
    allow_dangerous_deserialization=True
 )
 # Search
 results = vectorstore.similarity_search("query", k=5)
 ```
 ## LlamaIndex integration
 ```python
 from llama_index.vector_stores.faiss import FaissVectorStore
 import faiss
 # Create FAISS index
 d = 1536
 faiss_index = faiss.IndexFlatL2(d)
 vector_store = FaissVectorStore(faiss_index=faiss_index)
 ```
 ## Best practices
 1. **Choose right index type** - Flat for <10K, IVF for 10K-1M, HNSW for quality
 2. **Normalize for cosine** - Use IndexFlatIP with normalized vectors
 3. **Use GPU for large datasets** - 10-100× faster
 4. **Save trained indices** - Training is expensive
 5. **Tune nprobe/ef_search** - Balance speed/accuracy
 6. **Monitor memory** - PQ for large datasets
 7. **Batch queries** - Better GPU utilization
 ## Performance
 | Index Type | Build Time | Search Time | Memory | Accuracy |
 |------------|------------|-------------|--------|----------|
 | Flat | Fast | Slow | High | 100% |
 | IVF | Medium | Fast | Medium | 95-99% |
 | HNSW | Slow | Fastest | High | 99% |
 | PQ | Medium | Fast | Low | 90-95% |
 ## Resources
 - **GitHub**: https://github.com/facebookresearch/faiss ⭐ 31,700+
 - **Wiki**: https://github.com/facebookresearch/faiss/wiki
 - **License**: MIT
--- a/skills/mlops/faiss/references/index_types.md
+++ b/skills/mlops/faiss/references/index_types.md
@ -0,0 +1,280 @@
 # FAISS Index Types Guide
 Complete guide to choosing and using FAISS index types.
 ## Index selection guide
 | Dataset Size | Index Type | Training | Accuracy | Speed |
 |--------------|------------|----------|----------|-------|
 | < 10K | Flat | No | 100% | Slow |
 | 10K-1M | IVF | Yes | 95-99% | Fast |
 | 1M-10M | HNSW | No | 99% | Fastest |
 | > 10M | IVF+PQ | Yes | 90-95% | Fast, low memory |
 ## Flat indices (exact search)
 ### IndexFlatL2 - L2 (Euclidean) distance
 ```python
 import faiss
 import numpy as np
 d = 128  # Dimension
 index = faiss.IndexFlatL2(d)
 # Add vectors
 vectors = np.random.random((1000, d)).astype('float32')
 index.add(vectors)
 # Search
 k = 5
 query = np.random.random((1, d)).astype('float32')
 distances, indices = index.search(query, k)
 ```
 **Use when:**
 - Dataset < 10,000 vectors
 - Need 100% accuracy
 - Serving as baseline
 ### IndexFlatIP - Inner product (cosine similarity)
 ```python
 # For cosine similarity, normalize vectors first
 import faiss
 d = 128
 index = faiss.IndexFlatIP(d)
 # Normalize vectors (required for cosine similarity)
 faiss.normalize_L2(vectors)
 index.add(vectors)
 # Search
 faiss.normalize_L2(query)
 distances, indices = index.search(query, k)
 ```
 **Use when:**
 - Need cosine similarity
 - Recommendation systems
 - Text embeddings
 ## IVF indices (inverted file)
 ### IndexIVFFlat - Cluster-based search
 ```python
 # Create quantizer
 quantizer = faiss.IndexFlatL2(d)
 # Create IVF index with 100 clusters
 nlist = 100  # Number of clusters
 index = faiss.IndexIVFFlat(quantizer, d, nlist)
 # Train on data (required!)
 index.train(vectors)
 # Add vectors
 index.add(vectors)
 # Search (nprobe = clusters to search)
 index.nprobe = 10  # Search 10 closest clusters
 distances, indices = index.search(query, k)
 ```
 **Parameters:**
 - `nlist`: Number of clusters (√N to 4√N recommended)
 - `nprobe`: Clusters to search (1-nlist, higher = more accurate)
 **Use when:**
 - Dataset 10K-1M vectors
 - Need fast approximate search
 - Can afford training time
 ### Tuning nprobe
 ```python
 # Test different nprobe values
 for nprobe in [1, 5, 10, 20, 50]:
    index.nprobe = nprobe
    distances, indices = index.search(query, k)
    # Measure recall/speed trade-off
 ```
 **Guidelines:**
 - `nprobe=1`: Fastest, ~50% recall
 - `nprobe=10`: Good balance, ~95% recall
 - `nprobe=nlist`: Exact search (same as Flat)
 ## HNSW indices (graph-based)
 ### IndexHNSWFlat - Hierarchical NSW
 ```python
 # HNSW index
 M = 32  # Number of connections per layer (16-64)
 index = faiss.IndexHNSWFlat(d, M)
 # Optional: Set ef_construction (build time parameter)
 index.hnsw.efConstruction = 40  # Higher = better quality, slower build
 # Add vectors (no training needed!)
 index.add(vectors)
 # Search
 index.hnsw.efSearch = 16  # Search time parameter
 distances, indices = index.search(query, k)
 ```
 **Parameters:**
 - `M`: Connections per layer (16-64, default 32)
 - `efConstruction`: Build quality (40-200, higher = better)
 - `efSearch`: Search quality (16-512, higher = more accurate)
 **Use when:**
 - Need best quality approximate search
 - Can afford higher memory (more connections)
 - Dataset 1M-10M vectors
 ## PQ indices (product quantization)
 ### IndexPQ - Memory-efficient
 ```python
 # PQ reduces memory by 16-32×
 m = 8   # Number of subquantizers (divides d)
 nbits = 8  # Bits per subquantizer
 index = faiss.IndexPQ(d, m, nbits)
 # Train (required!)
 index.train(vectors)
 # Add vectors
 index.add(vectors)
 # Search
 distances, indices = index.search(query, k)
 ```
 **Parameters:**
 - `m`: Subquantizers (d must be divisible by m)
 - `nbits`: Bits per code (8 or 16)
 **Memory savings:**
 - Original: d × 4 bytes (float32)
 - PQ: m bytes
 - Compression ratio: 4d/m
 **Use when:**
 - Limited memory
 - Large datasets (> 10M vectors)
 - Can accept ~90-95% accuracy
 ### IndexIVFPQ - IVF + PQ combined
 ```python
 # Best for very large datasets
 nlist = 4096
 m = 8
 nbits = 8
 quantizer = faiss.IndexFlatL2(d)
 index = faiss.IndexIVFPQ(quantizer, d, nlist, m, nbits)
 # Train
 index.train(vectors)
 index.add(vectors)
 # Search
 index.nprobe = 32
 distances, indices = index.search(query, k)
 ```
 **Use when:**
 - Dataset > 10M vectors
 - Need fast search + low memory
 - Can accept 90-95% accuracy
 ## GPU indices
 ### Single GPU
 ```python
 import faiss
 # Create CPU index
 index_cpu = faiss.IndexFlatL2(d)
 # Move to GPU
 res = faiss.StandardGpuResources()  # GPU resources
 index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu)  # GPU 0
 # Use normally
 index_gpu.add(vectors)
 distances, indices = index_gpu.search(query, k)
 ```
 ### Multi-GPU
 ```python
 # Use all available GPUs
 index_gpu = faiss.index_cpu_to_all_gpus(index_cpu)
 # Or specific GPUs
 gpus = [0, 1, 2, 3]  # Use GPUs 0-3
 index_gpu = faiss.index_cpu_to_gpus_list(index_cpu, gpus)
 ```
 **Speedup:**
 - Single GPU: 10-50× faster than CPU
 - Multi-GPU: Near-linear scaling
 ## Index factory
 ```python
 # Easy index creation with string descriptors
 index = faiss.index_factory(d, "IVF100,Flat")
 index = faiss.index_factory(d, "HNSW32")
 index = faiss.index_factory(d, "IVF4096,PQ8")
 # Train and use
 index.train(vectors)
 index.add(vectors)
 ```
 **Common descriptors:**
 - `"Flat"`: Exact search
 - `"IVF100,Flat"`: IVF with 100 clusters
 - `"HNSW32"`: HNSW with M=32
 - `"IVF4096,PQ8"`: IVF + PQ compression
 ## Performance comparison
 ### Search speed (1M vectors, k=10)
 | Index | Build Time | Search Time | Memory | Recall |
 |-------|------------|-------------|--------|--------|
 | Flat | 0s | 50ms | 512 MB | 100% |
 | IVF100 | 5s | 2ms | 512 MB | 95% |
 | HNSW32 | 60s | 1ms | 1GB | 99% |
 | IVF4096+PQ8 | 30s | 3ms | 32 MB | 90% |
 *CPU (16 cores), 128-dim vectors*
 ## Best practices
 1. **Start with Flat** - Baseline for comparison
 2. **Use IVF for medium datasets** - Good balance
 3. **Use HNSW for best quality** - If memory allows
 4. **Add PQ for memory savings** - Large datasets
 5. **GPU for > 100K vectors** - 10-50× speedup
 6. **Tune nprobe/efSearch** - Trade-off speed/accuracy
 7. **Train on representative data** - Better clustering
 8. **Save trained indices** - Avoid retraining
 ## Resources
 - **Wiki**: https://github.com/facebookresearch/faiss/wiki
 - **Paper**: https://arxiv.org/abs/1702.08734
--- a/skills/mlops/flash-attention/SKILL.md
+++ b/skills/mlops/flash-attention/SKILL.md
@ -0,0 +1,367 @@
 ---
 name: optimizing-attention-flash
 description: Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (>512 tokens), encountering GPU memory issues with attention, or need faster inference. Supports PyTorch native SDPA, flash-attn library, H100 FP8, and sliding window attention.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Optimization, Flash Attention, Attention Optimization, Memory Efficiency, Speed Optimization, Long Context, PyTorch, SDPA, H100, FP8, Transformers]
 dependencies: [flash-attn, torch, transformers]
 ---
 # Flash Attention - Fast Memory-Efficient Attention
 ## Quick start
 Flash Attention provides 2-4x speedup and 10-20x memory reduction for transformer attention through IO-aware tiling and recomputation.
 **PyTorch native (easiest, PyTorch 2.2+)**:
 ```python
 import torch
 import torch.nn.functional as F
 q = torch.randn(2, 8, 512, 64, device='cuda', dtype=torch.float16)  # [batch, heads, seq, dim]
 k = torch.randn(2, 8, 512, 64, device='cuda', dtype=torch.float16)
 v = torch.randn(2, 8, 512, 64, device='cuda', dtype=torch.float16)
 # Automatically uses Flash Attention if available
 out = F.scaled_dot_product_attention(q, k, v)
 ```
 **flash-attn library (more features)**:
 ```bash
 pip install flash-attn --no-build-isolation
 ```
 ```python
 from flash_attn import flash_attn_func
 # q, k, v: [batch, seqlen, nheads, headdim]
 out = flash_attn_func(q, k, v, dropout_p=0.0, causal=True)
 ```
 ## Common workflows
 ### Workflow 1: Enable in existing PyTorch model
 Copy this checklist:
 ```
 Flash Attention Integration:
 - [ ] Step 1: Check PyTorch version (≥2.2)
 - [ ] Step 2: Enable Flash Attention backend
 - [ ] Step 3: Verify speedup with profiling
 - [ ] Step 4: Test accuracy matches baseline
 ```
 **Step 1: Check PyTorch version**
 ```bash
 python -c "import torch; print(torch.__version__)"
 # Should be ≥2.2.0
 ```
 If <2.2, upgrade:
 ```bash
 pip install --upgrade torch
 ```
 **Step 2: Enable Flash Attention backend**
 Replace standard attention:
 ```python
 # Before (standard attention)
 attn_weights = torch.softmax(q @ k.transpose(-2, -1) / math.sqrt(d_k), dim=-1)
 out = attn_weights @ v
 # After (Flash Attention)
 import torch.nn.functional as F
 out = F.scaled_dot_product_attention(q, k, v, attn_mask=mask)
 ```
 Force Flash Attention backend:
 ```python
 with torch.backends.cuda.sdp_kernel(
    enable_flash=True,
    enable_math=False,
    enable_mem_efficient=False
 ):
    out = F.scaled_dot_product_attention(q, k, v)
 ```
 **Step 3: Verify speedup with profiling**
 ```python
 import torch.utils.benchmark as benchmark
 def test_attention(use_flash):
    q, k, v = [torch.randn(2, 8, 2048, 64, device='cuda', dtype=torch.float16) for _ in range(3)]
    if use_flash:
        with torch.backends.cuda.sdp_kernel(enable_flash=True):
            return F.scaled_dot_product_attention(q, k, v)
    else:
        attn = (q @ k.transpose(-2, -1) / 8.0).softmax(dim=-1)
        return attn @ v
 # Benchmark
 t_flash = benchmark.Timer(stmt='test_attention(True)', globals=globals())
 t_standard = benchmark.Timer(stmt='test_attention(False)', globals=globals())
 print(f"Flash: {t_flash.timeit(100).mean:.3f}s")
 print(f"Standard: {t_standard.timeit(100).mean:.3f}s")
 ```
 Expected: 2-4x speedup for sequences >512 tokens.
 **Step 4: Test accuracy matches baseline**
 ```python
 # Compare outputs
 q, k, v = [torch.randn(1, 8, 512, 64, device='cuda', dtype=torch.float16) for _ in range(3)]
 # Flash Attention
 out_flash = F.scaled_dot_product_attention(q, k, v)
 # Standard attention
 attn_weights = torch.softmax(q @ k.transpose(-2, -1) / 8.0, dim=-1)
 out_standard = attn_weights @ v
 # Check difference
 diff = (out_flash - out_standard).abs().max()
 print(f"Max difference: {diff:.6f}")
 # Should be <1e-3 for float16
 ```
 ### Workflow 2: Use flash-attn library for advanced features
 For multi-query attention, sliding window, or H100 FP8.
 Copy this checklist:
 ```
 flash-attn Library Setup:
 - [ ] Step 1: Install flash-attn library
 - [ ] Step 2: Modify attention code
 - [ ] Step 3: Enable advanced features
 - [ ] Step 4: Benchmark performance
 ```
 **Step 1: Install flash-attn library**
 ```bash
 # NVIDIA GPUs (CUDA 12.0+)
 pip install flash-attn --no-build-isolation
 # Verify installation
 python -c "from flash_attn import flash_attn_func; print('Success')"
 ```
 **Step 2: Modify attention code**
 ```python
 from flash_attn import flash_attn_func
 # Input: [batch_size, seq_len, num_heads, head_dim]
 # Transpose from [batch, heads, seq, dim] if needed
 q = q.transpose(1, 2)  # [batch, seq, heads, dim]
 k = k.transpose(1, 2)
 v = v.transpose(1, 2)
 out = flash_attn_func(
    q, k, v,
    dropout_p=0.1,
    causal=True,  # For autoregressive models
    window_size=(-1, -1),  # No sliding window
    softmax_scale=None  # Auto-scale
 )
 out = out.transpose(1, 2)  # Back to [batch, heads, seq, dim]
 ```
 **Step 3: Enable advanced features**
 Multi-query attention (shared K/V across heads):
 ```python
 from flash_attn import flash_attn_func
 # q: [batch, seq, num_q_heads, dim]
 # k, v: [batch, seq, num_kv_heads, dim]  # Fewer KV heads
 out = flash_attn_func(q, k, v)  # Automatically handles MQA
 ```
 Sliding window attention (local attention):
 ```python
 # Only attend to window of 256 tokens before/after
 out = flash_attn_func(
    q, k, v,
    window_size=(256, 256),  # (left, right) window
    causal=True
 )
 ```
 **Step 4: Benchmark performance**
 ```python
 import torch
 from flash_attn import flash_attn_func
 import time
 q, k, v = [torch.randn(4, 4096, 32, 64, device='cuda', dtype=torch.float16) for _ in range(3)]
 # Warmup
 for _ in range(10):
    _ = flash_attn_func(q, k, v)
 # Benchmark
 torch.cuda.synchronize()
 start = time.time()
 for _ in range(100):
    out = flash_attn_func(q, k, v)
    torch.cuda.synchronize()
 end = time.time()
 print(f"Time per iteration: {(end-start)/100*1000:.2f}ms")
 print(f"Memory allocated: {torch.cuda.max_memory_allocated()/1e9:.2f}GB")
 ```
 ### Workflow 3: H100 FP8 optimization (FlashAttention-3)
 For maximum performance on H100 GPUs.
 ```
 FP8 Setup:
 - [ ] Step 1: Verify H100 GPU available
 - [ ] Step 2: Install flash-attn with FP8 support
 - [ ] Step 3: Convert inputs to FP8
 - [ ] Step 4: Run with FP8 attention
 ```
 **Step 1: Verify H100 GPU**
 ```bash
 nvidia-smi --query-gpu=name --format=csv
 # Should show "H100" or "H800"
 ```
 **Step 2: Install flash-attn with FP8 support**
 ```bash
 pip install flash-attn --no-build-isolation
 # FP8 support included for H100
 ```
 **Step 3: Convert inputs to FP8**
 ```python
 import torch
 q = torch.randn(2, 4096, 32, 64, device='cuda', dtype=torch.float16)
 k = torch.randn(2, 4096, 32, 64, device='cuda', dtype=torch.float16)
 v = torch.randn(2, 4096, 32, 64, device='cuda', dtype=torch.float16)
 # Convert to float8_e4m3 (FP8)
 q_fp8 = q.to(torch.float8_e4m3fn)
 k_fp8 = k.to(torch.float8_e4m3fn)
 v_fp8 = v.to(torch.float8_e4m3fn)
 ```
 **Step 4: Run with FP8 attention**
 ```python
 from flash_attn import flash_attn_func
 # FlashAttention-3 automatically uses FP8 kernels on H100
 out = flash_attn_func(q_fp8, k_fp8, v_fp8)
 # Result: ~1.2 PFLOPS, 1.5-2x faster than FP16
 ```
 ## When to use vs alternatives
 **Use Flash Attention when:**
 - Training transformers with sequences >512 tokens
 - Running inference with long context (>2K tokens)
 - GPU memory constrained (OOM with standard attention)
 - Need 2-4x speedup without accuracy loss
 - Using PyTorch 2.2+ or can install flash-attn
 **Use alternatives instead:**
 - **Standard attention**: Sequences <256 tokens (overhead not worth it)
 - **xFormers**: Need more attention variants (not just speed)
 - **Memory-efficient attention**: CPU inference (Flash Attention needs GPU)
 ## Common issues
 **Issue: ImportError: cannot import flash_attn**
 Install with no-build-isolation flag:
 ```bash
 pip install flash-attn --no-build-isolation
 ```
 Or install CUDA toolkit first:
 ```bash
 conda install cuda -c nvidia
 pip install flash-attn --no-build-isolation
 ```
 **Issue: Slower than expected (no speedup)**
 Flash Attention benefits increase with sequence length:
 - <512 tokens: Minimal speedup (10-20%)
 - 512-2K tokens: 2-3x speedup
 - >2K tokens: 3-4x speedup
 Check sequence length is sufficient.
 **Issue: RuntimeError: CUDA error**
 Verify GPU supports Flash Attention:
 ```python
 import torch
 print(torch.cuda.get_device_capability())
 # Should be ≥(7, 5) for Turing+
 ```
 Flash Attention requires:
 - Ampere (A100, A10): ✅ Full support
 - Turing (T4): ✅ Supported
 - Volta (V100): ❌ Not supported
 **Issue: Accuracy degradation**
 Check dtype is float16 or bfloat16 (not float32):
 ```python
 q = q.to(torch.float16)  # Or torch.bfloat16
 ```
 Flash Attention uses float16/bfloat16 for speed. Float32 not supported.
 ## Advanced topics
 **Integration with HuggingFace Transformers**: See [references/transformers-integration.md](references/transformers-integration.md) for enabling Flash Attention in BERT, GPT, Llama models.
 **Performance benchmarks**: See [references/benchmarks.md](references/benchmarks.md) for detailed speed and memory comparisons across GPUs and sequence lengths.
 **Algorithm details**: See [references/algorithm.md](references/algorithm.md) for tiling strategy, recomputation, and IO complexity analysis.
 **Advanced features**: See [references/advanced-features.md](references/advanced-features.md) for rotary embeddings, ALiBi, paged KV cache, and custom attention masks.
 ## Hardware requirements
 - **GPU**: NVIDIA Ampere+ (A100, A10, A30) or AMD MI200+
 - **VRAM**: Same as standard attention (Flash Attention doesn't increase memory)
 - **CUDA**: 12.0+ (11.8 minimum)
 - **PyTorch**: 2.2+ for native support
 **Not supported**: V100 (Volta), CPU inference
 ## Resources
 - Paper: "FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness" (NeurIPS 2022)
 - Paper: "FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning" (ICLR 2024)
 - Blog: https://tridao.me/blog/2024/flash3/
 - GitHub: https://github.com/Dao-AILab/flash-attention
 - PyTorch docs: https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
--- a/skills/mlops/flash-attention/references/benchmarks.md
+++ b/skills/mlops/flash-attention/references/benchmarks.md
@ -0,0 +1,215 @@
 # Performance Benchmarks
 ## Contents
 - Speed comparisons across GPUs
 - Memory usage analysis
 - Scaling with sequence length
 - Training vs inference performance
 - Flash Attention versions comparison
 ## Speed comparisons across GPUs
 ### A100 80GB (Ampere)
 **Forward pass time** (milliseconds, batch=8, heads=32, dim=64):
 | Seq Length | Standard | Flash Attn 2 | Flash Attn 3 | Speedup (FA2) |
 |------------|----------|--------------|--------------|---------------|
 | 512 | 1.2 | 0.9 | N/A | 1.3x |
 | 1024 | 3.8 | 1.4 | N/A | 2.7x |
 | 2048 | 14.2 | 4.8 | N/A | 3.0x |
 | 4096 | 55.1 | 17.3 | N/A | 3.2x |
 | 8192 | 218.5 | 66.2 | N/A | 3.3x |
 ### H100 80GB (Hopper)
 **Forward pass time** (milliseconds, same config):
 | Seq Length | Standard | Flash Attn 2 | Flash Attn 3 (FP16) | Flash Attn 3 (FP8) | Best Speedup |
 |------------|----------|--------------|---------------------|--------------------|--------------|
 | 512 | 0.8 | 0.6 | 0.4 | 0.3 | 2.7x |
 | 1024 | 2.6 | 1.0 | 0.6 | 0.4 | 6.5x |
 | 2048 | 9.8 | 3.4 | 2.0 | 1.3 | 7.5x |
 | 4096 | 38.2 | 12.5 | 7.2 | 4.8 | 8.0x |
 | 8192 | 151.4 | 47.8 | 27.1 | 18.2 | 8.3x |
 **Key insight**: Flash Attention 3 on H100 with FP8 achieves ~1.2 PFLOPS (75% of theoretical max).
 ### A10G 24GB (Ampere)
 **Forward pass time** (milliseconds, batch=4):
 | Seq Length | Standard | Flash Attn 2 | Speedup |
 |------------|----------|--------------|---------|
 | 512 | 2.1 | 1.6 | 1.3x |
 | 1024 | 6.8 | 2.8 | 2.4x |
 | 2048 | 25.9 | 9.4 | 2.8x |
 | 4096 | 102.1 | 35.2 | 2.9x |
 ## Memory usage analysis
 ### GPU memory consumption (batch=8, heads=32, dim=64)
 **Standard attention memory**:
 | Seq Length | Attention Matrix | KV Cache | Total | Notes |
 |------------|------------------|----------|-------|-------|
 | 512 | 8 MB | 32 MB | 40 MB | Manageable |
 | 2048 | 128 MB | 128 MB | 256 MB | Growing |
 | 8192 | 2048 MB (2 GB) | 512 MB | 2.5 GB | Large |
 | 32768 | 32768 MB (32 GB) | 2048 MB | 34 GB | OOM on 24GB GPUs |
 **Flash Attention 2 memory**:
 | Seq Length | Attention (on-chip) | KV Cache | Total | Reduction |
 |------------|---------------------|----------|-------|-----------|
 | 512 | 0 MB (recomputed) | 32 MB | 32 MB | 20% |
 | 2048 | 0 MB | 128 MB | 128 MB | 50% |
 | 8192 | 0 MB | 512 MB | 512 MB | 80% |
 | 32768 | 0 MB | 2048 MB | 2 GB | 94% |
 **Key insight**: Flash Attention doesn't materialize attention matrix, saving O(N²) memory.
 ### Memory scaling comparison
 **Llama 2 7B model memory** (float16, batch=1):
 | Context Length | Standard Attention | Flash Attention 2 | Can Fit 24GB GPU? |
 |----------------|-------------------|-------------------|-------------------|
 | 2K | 3.2 GB | 2.1 GB | Both: Yes |
 | 4K | 5.8 GB | 2.8 GB | Both: Yes |
 | 8K | 12.1 GB | 4.2 GB | Both: Yes |
 | 16K | 26.3 GB (OOM) | 7.8 GB | Only Flash: Yes |
 | 32K | OOM | 14.2 GB | Only Flash: Yes |
 ### Training memory (Llama 2 7B, batch=4)
 | Context | Standard (GB) | Flash Attn (GB) | Reduction |
 |---------|---------------|-----------------|-----------|
 | 2K | 18.2 | 12.4 | 32% |
 | 4K | 34.8 | 16.8 | 52% |
 | 8K | OOM (>40GB) | 26.2 | Fits! |
 ## Scaling with sequence length
 ### Computational complexity
 **Standard attention**:
 - Time: O(N² × d)
 - Memory: O(N² + N × d)
 **Flash Attention**:
 - Time: O(N² × d) (same, but with better constants)
 - Memory: O(N × d) (linear!)
 ### Empirical scaling (A100, batch=1, heads=32, dim=64)
 **Time per token (milliseconds)**:
 | Sequence | 512 | 1K | 2K | 4K | 8K | 16K |
 |----------|-----|-----|-----|-----|-----|------|
 | Standard | 0.15 | 0.37 | 1.11 | 3.44 | 13.4 | 52.8 |
 | Flash Attn 2 | 0.11 | 0.14 | 0.24 | 0.43 | 0.83 | 1.64 |
 | Speedup | 1.4x | 2.6x | 4.6x | 8.0x | 16.1x | 32.2x |
 **Observation**: Speedup increases quadratically with sequence length!
 ### Memory per token (MB)
 | Sequence | 512 | 1K | 2K | 4K | 8K | 16K |
 |----------|-----|-----|-----|-----|-----|------|
 | Standard | 0.08 | 0.13 | 0.25 | 0.64 | 2.05 | 8.13 |
 | Flash Attn 2 | 0.06 | 0.06 | 0.06 | 0.06 | 0.06 | 0.06 |
 **Observation**: Flash Attention memory per token is constant!
 ## Training vs inference performance
 ### Training (forward + backward, Llama 2 7B, A100)
 | Batch × Seq | Standard (samples/sec) | Flash Attn (samples/sec) | Speedup |
 |-------------|------------------------|--------------------------|---------|
 | 4 × 2K | 1.2 | 3.1 | 2.6x |
 | 8 × 2K | 2.1 | 5.8 | 2.8x |
 | 4 × 4K | 0.4 | 1.3 | 3.3x |
 | 8 × 4K | OOM | 2.4 | Enabled |
 | 2 × 8K | 0.1 | 0.4 | 4.0x |
 ### Inference (generation, Llama 2 7B, A100)
 | Context Length | Standard (tokens/sec) | Flash Attn (tokens/sec) | Speedup |
 |----------------|----------------------|-------------------------|---------|
 | 512 | 48 | 52 | 1.1x |
 | 2K | 42 | 62 | 1.5x |
 | 4K | 31 | 58 | 1.9x |
 | 8K | 18 | 51 | 2.8x |
 | 16K | OOM | 42 | Enabled |
 **Note**: Inference speedup less dramatic than training because generation is memory-bound (KV cache accesses).
 ## Flash Attention versions comparison
 ### Flash Attention 1 vs 2 vs 3 (H100, seq=4096, batch=8)
 | Metric | FA1 | FA2 | FA3 (FP16) | FA3 (FP8) |
 |--------|-----|-----|------------|-----------|
 | Forward time (ms) | 28.4 | 12.5 | 7.2 | 4.8 |
 | Memory (GB) | 4.8 | 4.2 | 4.2 | 2.8 |
 | TFLOPS | 180 | 420 | 740 | 1150 |
 | GPU util % | 35% | 55% | 75% | 82% |
 **Key improvements**:
 - FA2: 2.3x faster than FA1 (better parallelism)
 - FA3 (FP16): 1.7x faster than FA2 (H100 async optimizations)
 - FA3 (FP8): 2.6x faster than FA2 (low precision)
 ### Features by version
 | Feature | FA1 | FA2 | FA3 |
 |---------|-----|-----|-----|
 | Basic attention | ✅ | ✅ | ✅ |
 | Causal masking | ✅ | ✅ | ✅ |
 | Multi-query attention | ❌ | ✅ | ✅ |
 | Sliding window | ❌ | ✅ | ✅ |
 | Paged KV cache | ❌ | ✅ | ✅ |
 | FP8 support | ❌ | ❌ | ✅ (H100 only) |
 | Work partitioning | Basic | Advanced | Optimal |
 ## Real-world model benchmarks
 ### Llama 2 models (A100 80GB, batch=4, seq=2048)
 | Model | Params | Standard (samples/sec) | Flash Attn (samples/sec) | Speedup |
 |-------|--------|------------------------|--------------------------|---------|
 | Llama 2 7B | 7B | 1.2 | 3.1 | 2.6x |
 | Llama 2 13B | 13B | 0.6 | 1.7 | 2.8x |
 | Llama 2 70B | 70B | 0.12 | 0.34 | 2.8x |
 ### GPT-style models (seq=1024)
 | Model | Standard (tokens/sec) | Flash Attn (tokens/sec) | Speedup |
 |-------|----------------------|-------------------------|---------|
 | GPT-2 (124M) | 520 | 680 | 1.3x |
 | GPT-J (6B) | 42 | 98 | 2.3x |
 | GPT-NeoX (20B) | 8 | 22 | 2.75x |
 ## Recommendations by use case
 **Training large models (>7B parameters)**:
 - Use Flash Attention 2 on A100
 - Use Flash Attention 3 FP8 on H100 for maximum speed
 - Expected: 2.5-3x speedup
 **Long context inference (>4K tokens)**:
 - Flash Attention essential (enables contexts standard attention can't handle)
 - Expected: 2-4x speedup, 5-10x memory reduction
 **Short sequences (<512 tokens)**:
 - Flash Attention provides 1.2-1.5x speedup
 - Minimal memory benefit
 - Still worth enabling (no downside)
 **Multi-user serving**:
 - Flash Attention reduces per-request memory
 - Allows higher concurrent batch sizes
 - Can serve 2-3x more users on same hardware
--- a/skills/mlops/flash-attention/references/transformers-integration.md
+++ b/skills/mlops/flash-attention/references/transformers-integration.md
@ -0,0 +1,293 @@
 # HuggingFace Transformers Integration
 ## Contents
 - Enabling Flash Attention in Transformers
 - Supported model architectures
 - Configuration examples
 - Performance comparisons
 - Troubleshooting model-specific issues
 ## Enabling Flash Attention in Transformers
 HuggingFace Transformers (v4.36+) supports Flash Attention 2 natively.
 **Simple enable for any supported model**:
 ```python
 from transformers import AutoModel
 model = AutoModel.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16,
    device_map="auto"
 )
 ```
 **Install requirements**:
 ```bash
 pip install transformers>=4.36
 pip install flash-attn --no-build-isolation
 ```
 ## Supported model architectures
 As of Transformers 4.40:
 **Fully supported**:
 - Llama / Llama 2 / Llama 3
 - Mistral / Mixtral
 - Falcon
 - GPT-NeoX
 - Phi / Phi-2 / Phi-3
 - Qwen / Qwen2
 - Gemma
 - Starcoder2
 - GPT-J
 - OPT
 - BLOOM
 **Partially supported** (encoder-decoder):
 - BART
 - T5 / Flan-T5
 - Whisper
 **Check support**:
 ```python
 from transformers import AutoConfig
 config = AutoConfig.from_pretrained("model-name")
 print(config._attn_implementation_internal)
 # 'flash_attention_2' if supported
 ```
 ## Configuration examples
 ### Llama 2 with Flash Attention
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 model_id = "meta-llama/Llama-2-7b-hf"
 model = AutoModelForCausalLM.from_pretrained(
    model_id,
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16,
    device_map="auto"
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Generate
 inputs = tokenizer("Once upon a time", return_tensors="pt").to("cuda")
 outputs = model.generate(**inputs, max_length=100)
 print(tokenizer.decode(outputs[0]))
 ```
 ### Mistral with Flash Attention for long context
 ```python
 from transformers import AutoModelForCausalLM
 import torch
 model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,  # Better for long context
    device_map="auto",
    max_position_embeddings=32768  # Extended context
 )
 # Process long document (32K tokens)
 long_text = "..." * 10000
 inputs = tokenizer(long_text, return_tensors="pt", truncation=False).to("cuda")
 outputs = model.generate(**inputs, max_new_tokens=512)
 ```
 ### Fine-tuning with Flash Attention
 ```python
 from transformers import Trainer, TrainingArguments
 from transformers import AutoModelForCausalLM
 model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16
 )
 training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    fp16=True,  # Must match model dtype
    optim="adamw_torch_fused"  # Fast optimizer
 )
 trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset
 )
 trainer.train()
 ```
 ### Multi-GPU training
 ```python
 from transformers import AutoModelForCausalLM
 import torch
 # Model parallelism with Flash Attention
 model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-13b-hf",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16,
    device_map="auto",  # Automatic multi-GPU placement
    max_memory={0: "20GB", 1: "20GB"}  # Limit per GPU
 )
 ```
 ## Performance comparisons
 ### Memory usage (Llama 2 7B, batch=1)
 | Sequence Length | Standard Attention | Flash Attention 2 | Reduction |
 |-----------------|-------------------|-------------------|-----------|
 | 512 | 1.2 GB | 0.9 GB | 25% |
 | 2048 | 3.8 GB | 1.4 GB | 63% |
 | 8192 | 14.2 GB | 3.2 GB | 77% |
 | 32768 | OOM (>24GB) | 10.8 GB | Fits! |
 ### Speed (tokens/sec, A100 80GB)
 | Model | Standard | Flash Attn 2 | Speedup |
 |-------|----------|--------------|---------|
 | Llama 2 7B (seq=2048) | 42 | 118 | 2.8x |
 | Llama 2 13B (seq=4096) | 18 | 52 | 2.9x |
 | Llama 2 70B (seq=2048) | 4 | 11 | 2.75x |
 ### Training throughput (samples/sec)
 | Model | Batch Size | Standard | Flash Attn 2 | Speedup |
 |-------|------------|----------|--------------|---------|
 | Llama 2 7B | 4 | 1.2 | 3.1 | 2.6x |
 | Llama 2 7B | 8 | 2.1 | 5.8 | 2.8x |
 | Llama 2 13B | 2 | 0.6 | 1.7 | 2.8x |
 ## Troubleshooting model-specific issues
 ### Issue: Model doesn't support Flash Attention
 Check support list above. If not supported, use PyTorch SDPA as fallback:
 ```python
 model = AutoModelForCausalLM.from_pretrained(
    "model-name",
    attn_implementation="sdpa",  # PyTorch native (still faster)
    torch_dtype=torch.float16
 )
 ```
 ### Issue: CUDA out of memory during loading
 Reduce memory footprint:
 ```python
 model = AutoModelForCausalLM.from_pretrained(
    "model-name",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16,
    device_map="auto",
    max_memory={0: "18GB"},  # Reserve memory for KV cache
    low_cpu_mem_usage=True
 )
 ```
 ### Issue: Slower inference than expected
 Ensure dtype matches:
 ```python
 # Model and inputs must both be float16/bfloat16
 model = model.to(torch.float16)
 inputs = tokenizer(..., return_tensors="pt").to("cuda")
 inputs = {k: v.to(torch.float16) if v.dtype == torch.float32 else v
          for k, v in inputs.items()}
 ```
 ### Issue: Different outputs vs standard attention
 Flash Attention is numerically equivalent but uses different computation order. Small differences (<1e-3) are normal:
 ```python
 # Compare outputs
 model_standard = AutoModelForCausalLM.from_pretrained("model-name", torch_dtype=torch.float16)
 model_flash = AutoModelForCausalLM.from_pretrained(
    "model-name",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16
 )
 inputs = tokenizer("Test", return_tensors="pt").to("cuda")
 with torch.no_grad():
    out_standard = model_standard(**inputs).logits
    out_flash = model_flash(**inputs).logits
 diff = (out_standard - out_flash).abs().max()
 print(f"Max diff: {diff:.6f}")  # Should be ~1e-3 to 1e-4
 ```
 ### Issue: ImportError during model loading
 Install flash-attn:
 ```bash
 pip install flash-attn --no-build-isolation
 ```
 Or disable Flash Attention:
 ```python
 model = AutoModelForCausalLM.from_pretrained(
    "model-name",
    attn_implementation="eager",  # Standard PyTorch
    torch_dtype=torch.float16
 )
 ```
 ## Best practices
 1. **Always use float16/bfloat16** with Flash Attention (not float32)
 2. **Set device_map="auto"** for automatic memory management
 3. **Use bfloat16 for long context** (better numerical stability)
 4. **Enable gradient checkpointing** for training large models
 5. **Monitor memory** with `torch.cuda.max_memory_allocated()`
 **Example with all best practices**:
 ```python
 from transformers import AutoModelForCausalLM, TrainingArguments
 model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,  # Better for training
    device_map="auto",
    low_cpu_mem_usage=True
 )
 # Enable gradient checkpointing for memory
 model.gradient_checkpointing_enable()
 # Training with optimizations
 training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    bf16=True,  # Match model dtype
    optim="adamw_torch_fused",
    gradient_checkpointing=True
 )
 ```
--- a/skills/mlops/gguf/SKILL.md
+++ b/skills/mlops/gguf/SKILL.md
@ -0,0 +1,427 @@
 ---
 name: gguf-quantization
 description: GGUF format and llama.cpp quantization for efficient CPU/GPU inference. Use when deploying models on consumer hardware, Apple Silicon, or when needing flexible quantization from 2-8 bit without GPU requirements.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [GGUF, Quantization, llama.cpp, CPU Inference, Apple Silicon, Model Compression, Optimization]
 dependencies: [llama-cpp-python>=0.2.0]
 ---
 # GGUF - Quantization Format for llama.cpp
 The GGUF (GPT-Generated Unified Format) is the standard file format for llama.cpp, enabling efficient inference on CPUs, Apple Silicon, and GPUs with flexible quantization options.
 ## When to use GGUF
 **Use GGUF when:**
 - Deploying on consumer hardware (laptops, desktops)
 - Running on Apple Silicon (M1/M2/M3) with Metal acceleration
 - Need CPU inference without GPU requirements
 - Want flexible quantization (Q2_K to Q8_0)
 - Using local AI tools (LM Studio, Ollama, text-generation-webui)
 **Key advantages:**
 - **Universal hardware**: CPU, Apple Silicon, NVIDIA, AMD support
 - **No Python runtime**: Pure C/C++ inference
 - **Flexible quantization**: 2-8 bit with various methods (K-quants)
 - **Ecosystem support**: LM Studio, Ollama, koboldcpp, and more
 - **imatrix**: Importance matrix for better low-bit quality
 **Use alternatives instead:**
 - **AWQ/GPTQ**: Maximum accuracy with calibration on NVIDIA GPUs
 - **HQQ**: Fast calibration-free quantization for HuggingFace
 - **bitsandbytes**: Simple integration with transformers library
 - **TensorRT-LLM**: Production NVIDIA deployment with maximum speed
 ## Quick start
 ### Installation
 ```bash
 # Clone llama.cpp
 git clone https://github.com/ggml-org/llama.cpp
 cd llama.cpp
 # Build (CPU)
 make
 # Build with CUDA (NVIDIA)
 make GGML_CUDA=1
 # Build with Metal (Apple Silicon)
 make GGML_METAL=1
 # Install Python bindings (optional)
 pip install llama-cpp-python
 ```
 ### Convert model to GGUF
 ```bash
 # Install requirements
 pip install -r requirements.txt
 # Convert HuggingFace model to GGUF (FP16)
 python convert_hf_to_gguf.py ./path/to/model --outfile model-f16.gguf
 # Or specify output type
 python convert_hf_to_gguf.py ./path/to/model \
    --outfile model-f16.gguf \
    --outtype f16
 ```
 ### Quantize model
 ```bash
 # Basic quantization to Q4_K_M
 ./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
 # Quantize with importance matrix (better quality)
 ./llama-imatrix -m model-f16.gguf -f calibration.txt -o model.imatrix
 ./llama-quantize --imatrix model.imatrix model-f16.gguf model-q4_k_m.gguf Q4_K_M
 ```
 ### Run inference
 ```bash
 # CLI inference
 ./llama-cli -m model-q4_k_m.gguf -p "Hello, how are you?"
 # Interactive mode
 ./llama-cli -m model-q4_k_m.gguf --interactive
 # With GPU offload
 ./llama-cli -m model-q4_k_m.gguf -ngl 35 -p "Hello!"
 ```
 ## Quantization types
 ### K-quant methods (recommended)
 | Type | Bits | Size (7B) | Quality | Use Case |
 |------|------|-----------|---------|----------|
 | Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression |
 | Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
 | Q3_K_M | 3.3 | ~3.3 GB | Medium | Balance |
 | Q4_K_S | 4.0 | ~3.8 GB | Med-High | Good balance |
 | Q4_K_M | 4.5 | ~4.1 GB | High | **Recommended default** |
 | Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
 | Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
 | Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
 | Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality |
 ### Legacy methods
 | Type | Description |
 |------|-------------|
 | Q4_0 | 4-bit, basic |
 | Q4_1 | 4-bit with delta |
 | Q5_0 | 5-bit, basic |
 | Q5_1 | 5-bit with delta |
 **Recommendation**: Use K-quant methods (Q4_K_M, Q5_K_M) for best quality/size ratio.
 ## Conversion workflows
 ### Workflow 1: HuggingFace to GGUF
 ```bash
 # 1. Download model
 huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
 # 2. Convert to GGUF (FP16)
 python convert_hf_to_gguf.py ./llama-3.1-8b \
    --outfile llama-3.1-8b-f16.gguf \
    --outtype f16
 # 3. Quantize
 ./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
 # 4. Test
 ./llama-cli -m llama-3.1-8b-q4_k_m.gguf -p "Hello!" -n 50
 ```
 ### Workflow 2: With importance matrix (better quality)
 ```bash
 # 1. Convert to GGUF
 python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
 # 2. Create calibration text (diverse samples)
 cat > calibration.txt << 'EOF'
 The quick brown fox jumps over the lazy dog.
 Machine learning is a subset of artificial intelligence.
 Python is a popular programming language.
 # Add more diverse text samples...
 EOF
 # 3. Generate importance matrix
 ./llama-imatrix -m model-f16.gguf \
    -f calibration.txt \
    --chunk 512 \
    -o model.imatrix \
    -ngl 35  # GPU layers if available
 # 4. Quantize with imatrix
 ./llama-quantize --imatrix model.imatrix \
    model-f16.gguf \
    model-q4_k_m.gguf \
    Q4_K_M
 ```
 ### Workflow 3: Multiple quantizations
 ```bash
 #!/bin/bash
 MODEL="llama-3.1-8b-f16.gguf"
 IMATRIX="llama-3.1-8b.imatrix"
 # Generate imatrix once
 ./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
 # Create multiple quantizations
 for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
 done
 ```
 ## Python usage
 ### llama-cpp-python
 ```python
 from llama_cpp import Llama
 # Load model
 llm = Llama(
    model_path="./model-q4_k_m.gguf",
    n_ctx=4096,          # Context window
    n_gpu_layers=35,     # GPU offload (0 for CPU only)
    n_threads=8          # CPU threads
 )
 # Generate
 output = llm(
    "What is machine learning?",
    max_tokens=256,
    temperature=0.7,
    stop=["</s>", "\n\n"]
 )
 print(output["choices"][0]["text"])
 ```
 ### Chat completion
 ```python
 from llama_cpp import Llama
 llm = Llama(
    model_path="./model-q4_k_m.gguf",
    n_ctx=4096,
    n_gpu_layers=35,
    chat_format="llama-3"  # Or "chatml", "mistral", etc.
 )
 messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is Python?"}
 ]
 response = llm.create_chat_completion(
    messages=messages,
    max_tokens=256,
    temperature=0.7
 )
 print(response["choices"][0]["message"]["content"])
 ```
 ### Streaming
 ```python
 from llama_cpp import Llama
 llm = Llama(model_path="./model-q4_k_m.gguf", n_gpu_layers=35)
 # Stream tokens
 for chunk in llm(
    "Explain quantum computing:",
    max_tokens=256,
    stream=True
 ):
    print(chunk["choices"][0]["text"], end="", flush=True)
 ```
 ## Server mode
 ### Start OpenAI-compatible server
 ```bash
 # Start server
 ./llama-server -m model-q4_k_m.gguf \
    --host 0.0.0.0 \
    --port 8080 \
    -ngl 35 \
    -c 4096
 # Or with Python bindings
 python -m llama_cpp.server \
    --model model-q4_k_m.gguf \
    --n_gpu_layers 35 \
    --host 0.0.0.0 \
    --port 8080
 ```
 ### Use with OpenAI client
 ```python
 from openai import OpenAI
 client = OpenAI(
    base_url="http://localhost:8080/v1",
    api_key="not-needed"
 )
 response = client.chat.completions.create(
    model="local-model",
    messages=[{"role": "user", "content": "Hello!"}],
    max_tokens=256
 )
 print(response.choices[0].message.content)
 ```
 ## Hardware optimization
 ### Apple Silicon (Metal)
 ```bash
 # Build with Metal
 make clean && make GGML_METAL=1
 # Run with Metal acceleration
 ./llama-cli -m model.gguf -ngl 99 -p "Hello"
 # Python with Metal
 llm = Llama(
    model_path="model.gguf",
    n_gpu_layers=99,     # Offload all layers
    n_threads=1          # Metal handles parallelism
 )
 ```
 ### NVIDIA CUDA
 ```bash
 # Build with CUDA
 make clean && make GGML_CUDA=1
 # Run with CUDA
 ./llama-cli -m model.gguf -ngl 35 -p "Hello"
 # Specify GPU
 CUDA_VISIBLE_DEVICES=0 ./llama-cli -m model.gguf -ngl 35
 ```
 ### CPU optimization
 ```bash
 # Build with AVX2/AVX512
 make clean && make
 # Run with optimal threads
 ./llama-cli -m model.gguf -t 8 -p "Hello"
 # Python CPU config
 llm = Llama(
    model_path="model.gguf",
    n_gpu_layers=0,      # CPU only
    n_threads=8,         # Match physical cores
    n_batch=512          # Batch size for prompt processing
 )
 ```
 ## Integration with tools
 ### Ollama
 ```bash
 # Create Modelfile
 cat > Modelfile << 'EOF'
 FROM ./model-q4_k_m.gguf
 TEMPLATE """{{ .System }}
 {{ .Prompt }}"""
 PARAMETER temperature 0.7
 PARAMETER num_ctx 4096
 EOF
 # Create Ollama model
 ollama create mymodel -f Modelfile
 # Run
 ollama run mymodel "Hello!"
 ```
 ### LM Studio
 1. Place GGUF file in `~/.cache/lm-studio/models/`
 2. Open LM Studio and select the model
 3. Configure context length and GPU offload
 4. Start inference
 ### text-generation-webui
 ```bash
 # Place in models folder
 cp model-q4_k_m.gguf text-generation-webui/models/
 # Start with llama.cpp loader
 python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
 ```
 ## Best practices
 1. **Use K-quants**: Q4_K_M offers best quality/size balance
 2. **Use imatrix**: Always use importance matrix for Q4 and below
 3. **GPU offload**: Offload as many layers as VRAM allows
 4. **Context length**: Start with 4096, increase if needed
 5. **Thread count**: Match physical CPU cores, not logical
 6. **Batch size**: Increase n_batch for faster prompt processing
 ## Common issues
 **Model loads slowly:**
 ```bash
 # Use mmap for faster loading
 ./llama-cli -m model.gguf --mmap
 ```
 **Out of memory:**
 ```bash
 # Reduce GPU layers
 ./llama-cli -m model.gguf -ngl 20  # Reduce from 35
 # Or use smaller quantization
 ./llama-quantize model-f16.gguf model-q3_k_m.gguf Q3_K_M
 ```
 **Poor quality at low bits:**
 ```bash
 # Always use imatrix for Q4 and below
 ./llama-imatrix -m model-f16.gguf -f calibration.txt -o model.imatrix
 ./llama-quantize --imatrix model.imatrix model-f16.gguf model-q4_k_m.gguf Q4_K_M
 ```
 ## References
 - **[Advanced Usage](references/advanced-usage.md)** - Batching, speculative decoding, custom builds
 - **[Troubleshooting](references/troubleshooting.md)** - Common issues, debugging, benchmarks
 ## Resources
 - **Repository**: https://github.com/ggml-org/llama.cpp
 - **Python Bindings**: https://github.com/abetlen/llama-cpp-python
 - **Pre-quantized Models**: https://huggingface.co/TheBloke
 - **GGUF Converter**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
 - **License**: MIT
--- a/skills/mlops/gguf/references/advanced-usage.md
+++ b/skills/mlops/gguf/references/advanced-usage.md
@ -0,0 +1,504 @@
 # GGUF Advanced Usage Guide
 ## Speculative Decoding
 ### Draft Model Approach
 ```bash
 # Use smaller model as draft for faster generation
 ./llama-speculative \
    -m large-model-q4_k_m.gguf \
    -md draft-model-q4_k_m.gguf \
    -p "Write a story about AI" \
    -n 500 \
    --draft 8  # Draft tokens before verification
 ```
 ### Self-Speculative Decoding
 ```bash
 # Use same model with different context for speculation
 ./llama-cli -m model-q4_k_m.gguf \
    --lookup-cache-static lookup.bin \
    --lookup-cache-dynamic lookup-dynamic.bin \
    -p "Hello world"
 ```
 ## Batched Inference
 ### Process Multiple Prompts
 ```python
 from llama_cpp import Llama
 llm = Llama(
    model_path="model-q4_k_m.gguf",
    n_ctx=4096,
    n_gpu_layers=35,
    n_batch=512  # Larger batch for parallel processing
 )
 prompts = [
    "What is Python?",
    "Explain machine learning.",
    "Describe neural networks."
 ]
 # Process in batch (each prompt gets separate context)
 for prompt in prompts:
    output = llm(prompt, max_tokens=100)
    print(f"Q: {prompt}")
    print(f"A: {output['choices'][0]['text']}\n")
 ```
 ### Server Batching
 ```bash
 # Start server with batching
 ./llama-server -m model-q4_k_m.gguf \
    --host 0.0.0.0 \
    --port 8080 \
    -ngl 35 \
    -c 4096 \
    --parallel 4        # Concurrent requests
    --cont-batching     # Continuous batching
 ```
 ## Custom Model Conversion
 ### Convert with Vocabulary Modifications
 ```python
 # custom_convert.py
 import sys
 sys.path.insert(0, './llama.cpp')
 from convert_hf_to_gguf import main
 from gguf import GGUFWriter
 # Custom conversion with modified vocab
 def convert_with_custom_vocab(model_path, output_path):
    # Load and modify tokenizer
    from transformers import AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    # Add special tokens if needed
    special_tokens = {"additional_special_tokens": ["<|custom|>"]}
    tokenizer.add_special_tokens(special_tokens)
    tokenizer.save_pretrained(model_path)
    # Then run standard conversion
    main([model_path, "--outfile", output_path])
 ```
 ### Convert Specific Architecture
 ```bash
 # For Mistral-style models
 python convert_hf_to_gguf.py ./mistral-model \
    --outfile mistral-f16.gguf \
    --outtype f16
 # For Qwen models
 python convert_hf_to_gguf.py ./qwen-model \
    --outfile qwen-f16.gguf \
    --outtype f16
 # For Phi models
 python convert_hf_to_gguf.py ./phi-model \
    --outfile phi-f16.gguf \
    --outtype f16
 ```
 ## Advanced Quantization
 ### Mixed Quantization
 ```bash
 # Quantize different layer types differently
 ./llama-quantize model-f16.gguf model-mixed.gguf Q4_K_M \
    --allow-requantize \
    --leave-output-tensor
 ```
 ### Quantization with Token Embeddings
 ```bash
 # Keep embeddings at higher precision
 ./llama-quantize model-f16.gguf model-q4.gguf Q4_K_M \
    --token-embedding-type f16
 ```
 ### IQ Quantization (Importance-aware)
 ```bash
 # Ultra-low bit quantization with importance
 ./llama-quantize --imatrix model.imatrix \
    model-f16.gguf model-iq2_xxs.gguf IQ2_XXS
 # Available IQ types: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS
 ```
 ## Memory Optimization
 ### Memory Mapping
 ```python
 from llama_cpp import Llama
 # Use memory mapping for large models
 llm = Llama(
    model_path="model-q4_k_m.gguf",
    use_mmap=True,       # Memory map the model
    use_mlock=False,     # Don't lock in RAM
    n_gpu_layers=35
 )
 ```
 ### Partial GPU Offload
 ```python
 # Calculate layers to offload based on VRAM
 import subprocess
 def get_free_vram_gb():
    result = subprocess.run(
        ['nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader'],
        capture_output=True, text=True
    )
    return int(result.stdout.strip()) / 1024
 # Estimate layers based on VRAM (rough: 0.5GB per layer for 7B Q4)
 free_vram = get_free_vram_gb()
 layers_to_offload = int(free_vram / 0.5)
 llm = Llama(
    model_path="model-q4_k_m.gguf",
    n_gpu_layers=min(layers_to_offload, 35)  # Cap at total layers
 )
 ```
 ### KV Cache Optimization
 ```python
 from llama_cpp import Llama
 # Optimize KV cache for long contexts
 llm = Llama(
    model_path="model-q4_k_m.gguf",
    n_ctx=8192,          # Large context
    n_gpu_layers=35,
    type_k=1,            # Q8_0 for K cache (1)
    type_v=1,            # Q8_0 for V cache (1)
    # Or use Q4_0 (2) for more compression
 )
 ```
 ## Context Management
 ### Context Shifting
 ```python
 from llama_cpp import Llama
 llm = Llama(
    model_path="model-q4_k_m.gguf",
    n_ctx=4096,
    n_gpu_layers=35
 )
 # Handle long conversations with context shifting
 conversation = []
 max_history = 10
 def chat(user_message):
    conversation.append({"role": "user", "content": user_message})
    # Keep only recent history
    if len(conversation) > max_history * 2:
        conversation = conversation[-max_history * 2:]
    response = llm.create_chat_completion(
        messages=conversation,
        max_tokens=256
    )
    assistant_message = response["choices"][0]["message"]["content"]
    conversation.append({"role": "assistant", "content": assistant_message})
    return assistant_message
 ```
 ### Save and Load State
 ```bash
 # Save state to file
 ./llama-cli -m model.gguf \
    -p "Once upon a time" \
    --save-session session.bin \
    -n 100
 # Load and continue
 ./llama-cli -m model.gguf \
    --load-session session.bin \
    -p " and they lived" \
    -n 100
 ```
 ## Grammar Constrained Generation
 ### JSON Output
 ```python
 from llama_cpp import Llama, LlamaGrammar
 # Define JSON grammar
 json_grammar = LlamaGrammar.from_string('''
 root ::= object
 object ::= "{" ws pair ("," ws pair)* "}" ws
 pair ::= string ":" ws value
 value ::= string | number | object | array | "true" | "false" | "null"
 array ::= "[" ws value ("," ws value)* "]" ws
 string ::= "\\"" [^"\\\\]* "\\""
 number ::= [0-9]+
 ws ::= [ \\t\\n]*
 ''')
 llm = Llama(model_path="model-q4_k_m.gguf", n_gpu_layers=35)
 output = llm(
    "Output a JSON object with name and age:",
    grammar=json_grammar,
    max_tokens=100
 )
 print(output["choices"][0]["text"])
 ```
 ### Custom Grammar
 ```python
 # Grammar for specific format
 answer_grammar = LlamaGrammar.from_string('''
 root ::= "Answer: " letter "\\n" "Explanation: " explanation
 letter ::= [A-D]
 explanation ::= [a-zA-Z0-9 .,!?]+
 ''')
 output = llm(
    "Q: What is 2+2? A) 3 B) 4 C) 5 D) 6",
    grammar=answer_grammar,
    max_tokens=100
 )
 ```
 ## LoRA Integration
 ### Load LoRA Adapter
 ```bash
 # Apply LoRA at runtime
 ./llama-cli -m base-model-q4_k_m.gguf \
    --lora lora-adapter.gguf \
    --lora-scale 1.0 \
    -p "Hello!"
 ```
 ### Multiple LoRA Adapters
 ```bash
 # Stack multiple adapters
 ./llama-cli -m base-model.gguf \
    --lora adapter1.gguf --lora-scale 0.5 \
    --lora adapter2.gguf --lora-scale 0.5 \
    -p "Hello!"
 ```
 ### Python LoRA Usage
 ```python
 from llama_cpp import Llama
 llm = Llama(
    model_path="base-model-q4_k_m.gguf",
    lora_path="lora-adapter.gguf",
    lora_scale=1.0,
    n_gpu_layers=35
 )
 ```
 ## Embedding Generation
 ### Extract Embeddings
 ```python
 from llama_cpp import Llama
 llm = Llama(
    model_path="model-q4_k_m.gguf",
    embedding=True,      # Enable embedding mode
    n_gpu_layers=35
 )
 # Get embeddings
 embeddings = llm.embed("This is a test sentence.")
 print(f"Embedding dimension: {len(embeddings)}")
 ```
 ### Batch Embeddings
 ```python
 texts = [
    "Machine learning is fascinating.",
    "Deep learning uses neural networks.",
    "Python is a programming language."
 ]
 embeddings = [llm.embed(text) for text in texts]
 # Calculate similarity
 import numpy as np
 def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
 sim = cosine_similarity(embeddings[0], embeddings[1])
 print(f"Similarity: {sim:.4f}")
 ```
 ## Performance Tuning
 ### Benchmark Script
 ```python
 import time
 from llama_cpp import Llama
 def benchmark(model_path, prompt, n_tokens=100, n_runs=5):
    llm = Llama(
        model_path=model_path,
        n_gpu_layers=35,
        n_ctx=2048,
        verbose=False
    )
    # Warmup
    llm(prompt, max_tokens=10)
    # Benchmark
    times = []
    for _ in range(n_runs):
        start = time.time()
        output = llm(prompt, max_tokens=n_tokens)
        elapsed = time.time() - start
        times.append(elapsed)
    avg_time = sum(times) / len(times)
    tokens_per_sec = n_tokens / avg_time
    print(f"Model: {model_path}")
    print(f"Avg time: {avg_time:.2f}s")
    print(f"Tokens/sec: {tokens_per_sec:.1f}")
    return tokens_per_sec
 # Compare quantizations
 for quant in ["q4_k_m", "q5_k_m", "q8_0"]:
    benchmark(f"model-{quant}.gguf", "Explain quantum computing:", 100)
 ```
 ### Optimal Configuration Finder
 ```python
 def find_optimal_config(model_path, target_vram_gb=8):
    """Find optimal n_gpu_layers and n_batch for target VRAM."""
    from llama_cpp import Llama
    import gc
    best_config = None
    best_speed = 0
    for n_gpu_layers in range(0, 50, 5):
        for n_batch in [128, 256, 512, 1024]:
            try:
                gc.collect()
                llm = Llama(
                    model_path=model_path,
                    n_gpu_layers=n_gpu_layers,
                    n_batch=n_batch,
                    n_ctx=2048,
                    verbose=False
                )
                # Quick benchmark
                start = time.time()
                llm("Hello", max_tokens=50)
                speed = 50 / (time.time() - start)
                if speed > best_speed:
                    best_speed = speed
                    best_config = {
                        "n_gpu_layers": n_gpu_layers,
                        "n_batch": n_batch,
                        "speed": speed
                    }
                del llm
                gc.collect()
            except Exception as e:
                print(f"OOM at layers={n_gpu_layers}, batch={n_batch}")
                break
    return best_config
 ```
 ## Multi-GPU Setup
 ### Distribute Across GPUs
 ```bash
 # Split model across multiple GPUs
 ./llama-cli -m large-model.gguf \
    --tensor-split 0.5,0.5 \
    -ngl 60 \
    -p "Hello!"
 ```
 ### Python Multi-GPU
 ```python
 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
 from llama_cpp import Llama
 llm = Llama(
    model_path="large-model-q4_k_m.gguf",
    n_gpu_layers=60,
    tensor_split=[0.5, 0.5]  # Split evenly across 2 GPUs
 )
 ```
 ## Custom Builds
 ### Build with All Optimizations
 ```bash
 # Clean build with all CPU optimizations
 make clean
 LLAMA_OPENBLAS=1 LLAMA_BLAS_VENDOR=OpenBLAS make -j
 # With CUDA and cuBLAS
 make clean
 GGML_CUDA=1 LLAMA_CUBLAS=1 make -j
 # With specific CUDA architecture
 GGML_CUDA=1 CUDA_DOCKER_ARCH=sm_86 make -j
 ```
 ### CMake Build
 ```bash
 mkdir build && cd build
 cmake .. -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE=Release
 cmake --build . --config Release -j
 ```
--- a/skills/mlops/gguf/references/troubleshooting.md
+++ b/skills/mlops/gguf/references/troubleshooting.md
@ -0,0 +1,442 @@
 # GGUF Troubleshooting Guide
 ## Installation Issues
 ### Build Fails
 **Error**: `make: *** No targets specified and no makefile found`
 **Fix**:
 ```bash
 # Ensure you're in llama.cpp directory
 cd llama.cpp
 make
 ```
 **Error**: `fatal error: cuda_runtime.h: No such file or directory`
 **Fix**:
 ```bash
 # Install CUDA toolkit
 # Ubuntu
 sudo apt install nvidia-cuda-toolkit
 # Or set CUDA path
 export CUDA_PATH=/usr/local/cuda
 export PATH=$CUDA_PATH/bin:$PATH
 make GGML_CUDA=1
 ```
 ### Python Bindings Issues
 **Error**: `ERROR: Failed building wheel for llama-cpp-python`
 **Fix**:
 ```bash
 # Install build dependencies
 pip install cmake scikit-build-core
 # For CUDA support
 CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
 # For Metal (macOS)
 CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
 ```
 **Error**: `ImportError: libcudart.so.XX: cannot open shared object file`
 **Fix**:
 ```bash
 # Add CUDA libraries to path
 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 # Or reinstall with correct CUDA version
 pip uninstall llama-cpp-python
 CUDACXX=/usr/local/cuda/bin/nvcc CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 ```
 ## Conversion Issues
 ### Model Not Supported
 **Error**: `KeyError: 'model.embed_tokens.weight'`
 **Fix**:
 ```bash
 # Check model architecture
 python -c "from transformers import AutoConfig; print(AutoConfig.from_pretrained('./model').architectures)"
 # Use appropriate conversion script
 # For most models:
 python convert_hf_to_gguf.py ./model --outfile model.gguf
 # For older models, check if legacy script needed
 ```
 ### Vocabulary Mismatch
 **Error**: `RuntimeError: Vocabulary size mismatch`
 **Fix**:
 ```python
 # Ensure tokenizer matches model
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("./model")
 model = AutoModelForCausalLM.from_pretrained("./model")
 print(f"Tokenizer vocab size: {len(tokenizer)}")
 print(f"Model vocab size: {model.config.vocab_size}")
 # If mismatch, resize embeddings before conversion
 model.resize_token_embeddings(len(tokenizer))
 model.save_pretrained("./model-fixed")
 ```
 ### Out of Memory During Conversion
 **Error**: `torch.cuda.OutOfMemoryError` during conversion
 **Fix**:
 ```bash
 # Use CPU for conversion
 CUDA_VISIBLE_DEVICES="" python convert_hf_to_gguf.py ./model --outfile model.gguf
 # Or use low memory mode
 python convert_hf_to_gguf.py ./model --outfile model.gguf --outtype f16
 ```
 ## Quantization Issues
 ### Wrong Output File Size
 **Problem**: Quantized file is larger than expected
 **Check**:
 ```bash
 # Verify quantization type
 ./llama-cli -m model.gguf --verbose
 # Expected sizes for 7B model:
 # Q4_K_M: ~4.1 GB
 # Q5_K_M: ~4.8 GB
 # Q8_0: ~7.2 GB
 # F16: ~13.5 GB
 ```
 ### Quantization Crashes
 **Error**: `Segmentation fault` during quantization
 **Fix**:
 ```bash
 # Increase stack size
 ulimit -s unlimited
 # Or use less threads
 ./llama-quantize -t 4 model-f16.gguf model-q4.gguf Q4_K_M
 ```
 ### Poor Quality After Quantization
 **Problem**: Model outputs gibberish after quantization
 **Solutions**:
 1. **Use importance matrix**:
 ```bash
 # Generate imatrix with good calibration data
 ./llama-imatrix -m model-f16.gguf \
    -f wiki_sample.txt \
    --chunk 512 \
    -o model.imatrix
 # Quantize with imatrix
 ./llama-quantize --imatrix model.imatrix \
    model-f16.gguf model-q4_k_m.gguf Q4_K_M
 ```
 2. **Try higher precision**:
 ```bash
 # Use Q5_K_M or Q6_K instead of Q4
 ./llama-quantize model-f16.gguf model-q5_k_m.gguf Q5_K_M
 ```
 3. **Check original model**:
 ```bash
 # Test FP16 version first
 ./llama-cli -m model-f16.gguf -p "Hello, how are you?" -n 50
 ```
 ## Inference Issues
 ### Slow Generation
 **Problem**: Generation is slower than expected
 **Solutions**:
 1. **Enable GPU offload**:
 ```bash
 ./llama-cli -m model.gguf -ngl 35 -p "Hello"
 ```
 2. **Optimize batch size**:
 ```python
 llm = Llama(
    model_path="model.gguf",
    n_batch=512,        # Increase for faster prompt processing
    n_gpu_layers=35
 )
 ```
 3. **Use appropriate threads**:
 ```bash
 # Match physical cores, not logical
 ./llama-cli -m model.gguf -t 8 -p "Hello"
 ```
 4. **Enable Flash Attention** (if supported):
 ```bash
 ./llama-cli -m model.gguf -ngl 35 --flash-attn -p "Hello"
 ```
 ### Out of Memory
 **Error**: `CUDA out of memory` or system freeze
 **Solutions**:
 1. **Reduce GPU layers**:
 ```python
 # Start low and increase
 llm = Llama(model_path="model.gguf", n_gpu_layers=10)
 ```
 2. **Use smaller quantization**:
 ```bash
 ./llama-quantize model-f16.gguf model-q3_k_m.gguf Q3_K_M
 ```
 3. **Reduce context length**:
 ```python
 llm = Llama(
    model_path="model.gguf",
    n_ctx=2048,  # Reduce from 4096
    n_gpu_layers=35
 )
 ```
 4. **Quantize KV cache**:
 ```python
 llm = Llama(
    model_path="model.gguf",
    type_k=2,    # Q4_0 for K cache
    type_v=2,    # Q4_0 for V cache
    n_gpu_layers=35
 )
 ```
 ### Garbage Output
 **Problem**: Model outputs random characters or nonsense
 **Diagnose**:
 ```python
 # Check model loading
 llm = Llama(model_path="model.gguf", verbose=True)
 # Test with simple prompt
 output = llm("1+1=", max_tokens=5, temperature=0)
 print(output)
 ```
 **Solutions**:
 1. **Check model integrity**:
 ```bash
 # Verify GGUF file
 ./llama-cli -m model.gguf --verbose 2>&1 | head -50
 ```
 2. **Use correct chat format**:
 ```python
 llm = Llama(
    model_path="model.gguf",
    chat_format="llama-3"  # Match your model: chatml, mistral, etc.
 )
 ```
 3. **Check temperature**:
 ```python
 # Use lower temperature for deterministic output
 output = llm("Hello", max_tokens=50, temperature=0.1)
 ```
 ### Token Issues
 **Error**: `RuntimeError: unknown token` or encoding errors
 **Fix**:
 ```python
 # Ensure UTF-8 encoding
 prompt = "Hello, world!".encode('utf-8').decode('utf-8')
 output = llm(prompt, max_tokens=50)
 ```
 ## Server Issues
 ### Connection Refused
 **Error**: `Connection refused` when accessing server
 **Fix**:
 ```bash
 # Bind to all interfaces
 ./llama-server -m model.gguf --host 0.0.0.0 --port 8080
 # Check if port is in use
 lsof -i :8080
 ```
 ### Server Crashes Under Load
 **Problem**: Server crashes with multiple concurrent requests
 **Solutions**:
 1. **Limit parallelism**:
 ```bash
 ./llama-server -m model.gguf \
    --parallel 2 \
    -c 4096 \
    --cont-batching
 ```
 2. **Add request timeout**:
 ```bash
 ./llama-server -m model.gguf --timeout 300
 ```
 3. **Monitor memory**:
 ```bash
 watch -n 1 nvidia-smi  # For GPU
 watch -n 1 free -h     # For RAM
 ```
 ### API Compatibility Issues
 **Problem**: OpenAI client not working with server
 **Fix**:
 ```python
 from openai import OpenAI
 # Use correct base URL format
 client = OpenAI(
    base_url="http://localhost:8080/v1",  # Include /v1
    api_key="not-needed"
 )
 # Use correct model name
 response = client.chat.completions.create(
    model="local",  # Or the actual model name
    messages=[{"role": "user", "content": "Hello"}]
 )
 ```
 ## Apple Silicon Issues
 ### Metal Not Working
 **Problem**: Metal acceleration not enabled
 **Check**:
 ```bash
 # Verify Metal support
 ./llama-cli -m model.gguf --verbose 2>&1 | grep -i metal
 ```
 **Fix**:
 ```bash
 # Rebuild with Metal
 make clean
 make GGML_METAL=1
 # Python bindings
 CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall
 ```
 ### Incorrect Memory Usage on M1/M2
 **Problem**: Model uses too much unified memory
 **Fix**:
 ```python
 # Offload all layers for Metal
 llm = Llama(
    model_path="model.gguf",
    n_gpu_layers=99,    # Offload everything
    n_threads=1         # Metal handles parallelism
 )
 ```
 ## Debugging
 ### Enable Verbose Output
 ```bash
 # CLI verbose mode
 ./llama-cli -m model.gguf --verbose -p "Hello" -n 50
 # Python verbose
 llm = Llama(model_path="model.gguf", verbose=True)
 ```
 ### Check Model Metadata
 ```bash
 # View GGUF metadata
 ./llama-cli -m model.gguf --verbose 2>&1 | head -100
 ```
 ### Validate GGUF File
 ```python
 import struct
 def validate_gguf(filepath):
    with open(filepath, 'rb') as f:
        magic = f.read(4)
        if magic != b'GGUF':
            print(f"Invalid magic: {magic}")
            return False
        version = struct.unpack('<I', f.read(4))[0]
        print(f"GGUF version: {version}")
        tensor_count = struct.unpack('<Q', f.read(8))[0]
        metadata_count = struct.unpack('<Q', f.read(8))[0]
        print(f"Tensors: {tensor_count}, Metadata: {metadata_count}")
        return True
 validate_gguf("model.gguf")
 ```
 ## Getting Help
 1. **GitHub Issues**: https://github.com/ggml-org/llama.cpp/issues
 2. **Discussions**: https://github.com/ggml-org/llama.cpp/discussions
 3. **Reddit**: r/LocalLLaMA
 ### Reporting Issues
 Include:
 - llama.cpp version/commit hash
 - Build command used
 - Model name and quantization
 - Full error message/stack trace
 - Hardware: CPU/GPU model, RAM, VRAM
 - OS version
 - Minimal reproduction steps
--- a/skills/mlops/grpo-rl-training/README.md
+++ b/skills/mlops/grpo-rl-training/README.md
@ -0,0 +1,97 @@
 # GRPO/RL Training Skill
 **Expert-level guidance for Group Relative Policy Optimization with TRL**
 ## 📁 Skill Structure
 ```
 grpo-rl-training/
 ├── SKILL.md                              # Main skill documentation (READ THIS FIRST)
 ├── README.md                             # This file
 ├── templates/
 │   └── basic_grpo_training.py            # Production-ready training template
 └── examples/
    └── reward_functions_library.py       # 20+ reward function examples
 ```
 ## 🚀 Quick Start
 1. **Read SKILL.md** - Comprehensive guide with all concepts and patterns
 2. **Copy `templates/basic_grpo_training.py`** - Start with working code
 3. **Browse `examples/reward_functions_library.py`** - Pick reward functions for your task
 4. **Modify for your use case** - Adapt dataset, rewards, and config
 ## 💡 What's Inside
 ### SKILL.md (Main Documentation)
 - Core GRPO concepts and algorithm fundamentals
 - Complete implementation workflow (dataset → rewards → training → deployment)
 - 10+ reward function examples with code
 - Hyperparameter tuning guide
 - Training insights (loss behavior, metrics, debugging)
 - Troubleshooting guide
 - Production best practices
 ### Templates
 - **basic_grpo_training.py**: Minimal, production-ready training script
  - Uses Qwen 2.5 1.5B Instruct
  - 3 reward functions (format + correctness)
  - LoRA for efficient training
  - Fully documented and ready to run
 ### Examples
 - **reward_functions_library.py**: 20+ battle-tested reward functions
  - Correctness rewards (exact match, fuzzy match, numeric, code execution)
  - Format rewards (XML, JSON, strict/soft)
  - Length rewards (ideal length, min/max)
  - Style rewards (reasoning quality, citations, repetition penalty)
  - Combined rewards (multi-objective optimization)
  - Preset collections for common tasks
 ## 📖 Usage for Agents
 When this skill is loaded in your agent's context:
 1. **Always read SKILL.md first** before implementing
 2. **Start simple** - Use length-based reward to validate setup
 3. **Build incrementally** - Add one reward function at a time
 4. **Reference examples** - Copy patterns from reward_functions_library.py
 5. **Monitor training** - Watch reward metrics (not loss!)
 ## 🎯 Common Use Cases
 | Task Type | Recommended Rewards | Template |
 |-----------|---------------------|----------|
 | Math reasoning | `MATH_REASONING_REWARDS` preset | basic_grpo_training.py |
 | Code generation | `CODE_GENERATION_REWARDS` preset | Modify dataset in template |
 | Summarization | `SUMMARIZATION_REWARDS` preset | Adjust prompts + rewards |
 | Q&A | `QA_REWARDS` preset | Use fuzzy match + citations |
 ## ⚠️ Critical Reminders
 - **Loss goes UP during training** - This is normal (it's KL divergence)
 - **Use 3-5 reward functions** - Single rewards often fail
 - **Test rewards before training** - Debug each function independently
 - **Monitor reward_std** - Should stay > 0.1 (avoid mode collapse)
 - **Start with num_generations=4-8** - Scale up if GPU allows
 ## 🔗 External Resources
 - [TRL Documentation](https://huggingface.co/docs/trl)
 - [DeepSeek R1 Paper](https://arxiv.org/abs/2501.12948)
 - [Open R1 Implementation](https://github.com/huggingface/open-r1)
 - [Unsloth (2-3x faster)](https://docs.unsloth.ai/)
 ## 📝 Version
 **v1.0.0** - Initial release (January 2025)
 ## 👨‍💻 Maintained By
 Orchestra Research
 For questions or improvements, see https://orchestra.com
 ---
 **License:** MIT
 **Last Updated:** January 2025
--- a/skills/mlops/grpo-rl-training/SKILL.md
+++ b/skills/mlops/grpo-rl-training/SKILL.md
@ -0,0 +1,572 @@
 ---
 name: grpo-rl-training
 description: Expert guidance for GRPO/RL fine-tuning with TRL for reasoning and task-specific model training
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Post-Training, Reinforcement Learning, GRPO, TRL, RLHF, Reward Modeling, Reasoning, DPO, PPO, Structured Output]
 dependencies: [transformers>=4.47.0, trl>=0.14.0, datasets>=3.2.0, peft>=0.14.0, torch]
 ---
 # GRPO/RL Training with TRL
 Expert-level guidance for implementing Group Relative Policy Optimization (GRPO) using the Transformer Reinforcement Learning (TRL) library. This skill provides battle-tested patterns, critical insights, and production-ready workflows for fine-tuning language models with custom reward functions.
 ## When to Use This Skill
 Use GRPO training when you need to:
 - **Enforce specific output formats** (e.g., XML tags, JSON, structured reasoning)
 - **Teach verifiable tasks** with objective correctness metrics (math, coding, fact-checking)
 - **Improve reasoning capabilities** by rewarding chain-of-thought patterns
 - **Align models to domain-specific behaviors** without labeled preference data
 - **Optimize for multiple objectives** simultaneously (format + correctness + style)
 **Do NOT use GRPO for:**
 - Simple supervised fine-tuning tasks (use SFT instead)
 - Tasks without clear reward signals
 - When you already have high-quality preference pairs (use DPO/PPO instead)
 ---
 ## Core Concepts
 ### 1. GRPO Algorithm Fundamentals
 **Key Mechanism:**
 - Generates **multiple completions** for each prompt (group size: 4-16)
 - Compares completions within each group using reward functions
 - Updates policy to favor higher-rewarded responses relative to the group
 **Critical Difference from PPO:**
 - No separate reward model needed
 - More sample-efficient (learns from within-group comparisons)
 - Simpler to implement and debug
 **Mathematical Intuition:**
 ```
 For each prompt p:
  1. Generate N completions: {c₁, c₂, ..., cₙ}
  2. Compute rewards: {r₁, r₂, ..., rₙ}
  3. Learn to increase probability of high-reward completions
     relative to low-reward ones in the same group
 ```
 ### 2. Reward Function Design Philosophy
 **Golden Rules:**
 1. **Compose multiple reward functions** - Each handles one aspect (format, correctness, style)
 2. **Scale rewards appropriately** - Higher weight = stronger signal
 3. **Use incremental rewards** - Partial credit for partial compliance
 4. **Test rewards independently** - Debug each reward function in isolation
 **Reward Function Types:**
 | Type | Use Case | Example Weight |
 |------|----------|----------------|
 | **Correctness** | Verifiable tasks (math, code) | 2.0 (highest) |
 | **Format** | Strict structure enforcement | 0.5-1.0 |
 | **Length** | Encourage verbosity/conciseness | 0.1-0.5 |
 | **Style** | Penalize unwanted patterns | -0.5 to 0.5 |
 ---
 ## Implementation Workflow
 ### Step 1: Dataset Preparation
 **Critical Requirements:**
 - Prompts in chat format (list of dicts with 'role' and 'content')
 - Include system prompts to set expectations
 - For verifiable tasks, include ground truth answers as additional columns
 **Example Structure:**
 ```python
 from datasets import load_dataset, Dataset
 SYSTEM_PROMPT = """
 Respond in the following format:
 <reasoning>
 [Your step-by-step thinking]
 </reasoning>
 <answer>
 [Final answer]
 </answer>
 """
 def prepare_dataset(raw_data):
    """
    Transform raw data into GRPO-compatible format.
    Returns: Dataset with columns:
    - 'prompt': List[Dict] with role/content (system + user messages)
    - 'answer': str (ground truth, optional but recommended)
    """
    return raw_data.map(lambda x: {
        'prompt': [
            {'role': 'system', 'content': SYSTEM_PROMPT},
            {'role': 'user', 'content': x['question']}
        ],
        'answer': extract_answer(x['raw_answer'])
    })
 ```
 **Pro Tips:**
 - Use one-shot or few-shot examples in system prompt for complex formats
 - Keep prompts concise (max_prompt_length: 256-512 tokens)
 - Validate data quality before training (garbage in = garbage out)
 ### Step 2: Reward Function Implementation
 **Template Structure:**
 ```python
 def reward_function_name(
    prompts,        # List[List[Dict]]: Original prompts
    completions,    # List[List[Dict]]: Model generations
    answer=None,    # Optional: Ground truth from dataset
    **kwargs        # Additional dataset columns
 ) -> list[float]:
    """
    Evaluate completions and return rewards.
    Returns: List of floats (one per completion)
    """
    # Extract completion text
    responses = [comp[0]['content'] for comp in completions]
    # Compute rewards
    rewards = []
    for response in responses:
        score = compute_score(response)
        rewards.append(score)
    return rewards
 ```
 **Example 1: Correctness Reward (Math/Coding)**
 ```python
 def correctness_reward(prompts, completions, answer, **kwargs):
    """Reward correct answers with high score."""
    responses = [comp[0]['content'] for comp in completions]
    extracted = [extract_final_answer(r) for r in responses]
    return [2.0 if ans == gt else 0.0
            for ans, gt in zip(extracted, answer)]
 ```
 **Example 2: Format Reward (Structured Output)**
 ```python
 import re
 def format_reward(completions, **kwargs):
    """Reward XML-like structured format."""
    pattern = r'<reasoning>.*?</reasoning>\s*<answer>.*?</answer>'
    responses = [comp[0]['content'] for comp in completions]
    return [1.0 if re.search(pattern, r, re.DOTALL) else 0.0
            for r in responses]
 ```
 **Example 3: Incremental Format Reward (Partial Credit)**
 ```python
 def incremental_format_reward(completions, **kwargs):
    """Award partial credit for format compliance."""
    responses = [comp[0]['content'] for comp in completions]
    rewards = []
    for r in responses:
        score = 0.0
        if '<reasoning>' in r:
            score += 0.25
        if '</reasoning>' in r:
            score += 0.25
        if '<answer>' in r:
            score += 0.25
        if '</answer>' in r:
            score += 0.25
        # Penalize extra text after closing tag
        if r.count('</answer>') == 1:
            extra_text = r.split('</answer>')[-1].strip()
            score -= len(extra_text) * 0.001
        rewards.append(score)
    return rewards
 ```
 **Critical Insight:**
 Combine 3-5 reward functions for robust training. Order matters less than diversity of signals.
 ### Step 3: Training Configuration
 **Memory-Optimized Config (Small GPU)**
 ```python
 from trl import GRPOConfig
 training_args = GRPOConfig(
    output_dir="outputs/grpo-model",
    # Learning rate
    learning_rate=5e-6,          # Lower = more stable
    adam_beta1=0.9,
    adam_beta2=0.99,
    weight_decay=0.1,
    warmup_ratio=0.1,
    lr_scheduler_type='cosine',
    # Batch settings
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,  # Effective batch = 4
    # GRPO-specific
    num_generations=8,            # Group size: 8-16 recommended
    max_prompt_length=256,
    max_completion_length=512,
    # Training duration
    num_train_epochs=1,
    max_steps=None,               # Or set fixed steps (e.g., 500)
    # Optimization
    bf16=True,                    # Faster on A100/H100
    optim="adamw_8bit",          # Memory-efficient optimizer
    max_grad_norm=0.1,
    # Logging
    logging_steps=1,
    save_steps=100,
    report_to="wandb",            # Or "none" for no logging
 )
 ```
 **High-Performance Config (Large GPU)**
 ```python
 training_args = GRPOConfig(
    output_dir="outputs/grpo-model",
    learning_rate=1e-5,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_generations=16,           # Larger groups = better signal
    max_prompt_length=512,
    max_completion_length=1024,
    num_train_epochs=1,
    bf16=True,
    use_vllm=True,                # Fast generation with vLLM
    logging_steps=10,
 )
 ```
 **Critical Hyperparameters:**
 | Parameter | Impact | Tuning Advice |
 |-----------|--------|---------------|
 | `num_generations` | Group size for comparison | Start with 8, increase to 16 if GPU allows |
 | `learning_rate` | Convergence speed/stability | 5e-6 (safe), 1e-5 (faster, riskier) |
 | `max_completion_length` | Output verbosity | Match your task (512 for reasoning, 256 for short answers) |
 | `gradient_accumulation_steps` | Effective batch size | Increase if GPU memory limited |
 ### Step 4: Model Setup and Training
 **Standard Setup (Transformers)**
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import LoraConfig
 from trl import GRPOTrainer
 # Load model
 model_name = "Qwen/Qwen2.5-1.5B-Instruct"
 model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2",  # 2-3x faster
    device_map="auto"
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 tokenizer.pad_token = tokenizer.eos_token
 # Optional: LoRA for parameter-efficient training
 peft_config = LoraConfig(
    r=16,                         # Rank (higher = more capacity)
    lora_alpha=32,               # Scaling factor (typically 2*r)
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    task_type="CAUSAL_LM",
    lora_dropout=0.05,
 )
 # Initialize trainer
 trainer = GRPOTrainer(
    model=model,
    processing_class=tokenizer,
    reward_funcs=[
        incremental_format_reward,
        format_reward,
        correctness_reward,
    ],
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,      # Remove for full fine-tuning
 )
 # Train
 trainer.train()
 # Save
 trainer.save_model("final_model")
 ```
 **Unsloth Setup (2-3x Faster)**
 ```python
 from unsloth import FastLanguageModel
 model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="google/gemma-3-1b-it",
    max_seq_length=1024,
    load_in_4bit=True,
    fast_inference=True,
    max_lora_rank=32,
 )
 model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=32,
    use_gradient_checkpointing="unsloth",
 )
 # Rest is identical to standard setup
 trainer = GRPOTrainer(model=model, ...)
 trainer.train()
 ```
 ---
 ## Critical Training Insights
 ### 1. Loss Behavior (EXPECTED PATTERN)
 - **Loss starts near 0 and INCREASES during training**
 - This is CORRECT - loss measures KL divergence from initial policy
 - Model is learning (diverging from original behavior to optimize rewards)
 - Monitor reward metrics instead of loss for progress
 ### 2. Reward Tracking
 Key metrics to watch:
 - `reward`: Average across all completions
 - `reward_std`: Diversity within groups (should remain > 0)
 - `kl`: KL divergence from reference (should grow moderately)
 **Healthy Training Pattern:**
 ```
 Step   Reward    Reward_Std   KL
 100    0.5       0.3          0.02
 200    0.8       0.25         0.05
 300    1.2       0.2          0.08  ← Good progression
 400    1.5       0.15         0.12
 ```
 **Warning Signs:**
 - Reward std → 0 (model collapsing to single response)
 - KL exploding (> 0.5) (diverging too much, reduce LR)
 - Reward stuck (reward functions too harsh or model capacity issue)
 ### 3. Common Pitfalls and Solutions
 | Problem | Symptom | Solution |
 |---------|---------|----------|
 | **Mode collapse** | All completions identical | Increase `num_generations`, add diversity penalty |
 | **No learning** | Flat rewards | Check reward function logic, increase LR |
 | **OOM errors** | GPU memory exceeded | Reduce `num_generations`, enable gradient checkpointing |
 | **Slow training** | < 1 it/s | Enable `use_vllm=True`, use Unsloth, reduce seq length |
 | **Format ignored** | Model doesn't follow structure | Increase format reward weight, add incremental rewards |
 ---
 ## Advanced Patterns
 ### 1. Multi-Stage Training
 For complex tasks, train in stages:
 ```python
 # Stage 1: Format compliance (epochs=1)
 trainer_stage1 = GRPOTrainer(
    model=model,
    reward_funcs=[incremental_format_reward, format_reward],
    ...
 )
 trainer_stage1.train()
 # Stage 2: Correctness (epochs=1)
 trainer_stage2 = GRPOTrainer(
    model=model,
    reward_funcs=[format_reward, correctness_reward],
    ...
 )
 trainer_stage2.train()
 ```
 ### 2. Adaptive Reward Scaling
 ```python
 class AdaptiveReward:
    def __init__(self, base_reward_func, initial_weight=1.0):
        self.func = base_reward_func
        self.weight = initial_weight
    def __call__(self, *args, **kwargs):
        rewards = self.func(*args, **kwargs)
        return [r * self.weight for r in rewards]
    def adjust_weight(self, success_rate):
        """Increase weight if model struggling, decrease if succeeding."""
        if success_rate < 0.3:
            self.weight *= 1.2
        elif success_rate > 0.8:
            self.weight *= 0.9
 ```
 ### 3. Custom Dataset Integration
 ```python
 def load_custom_knowledge_base(csv_path):
    """Example: School communication platform docs."""
    import pandas as pd
    df = pd.read_csv(csv_path)
    dataset = Dataset.from_pandas(df).map(lambda x: {
        'prompt': [
            {'role': 'system', 'content': CUSTOM_SYSTEM_PROMPT},
            {'role': 'user', 'content': x['question']}
        ],
        'answer': x['expert_answer']
    })
    return dataset
 ```
 ---
 ## Deployment and Inference
 ### Save and Merge LoRA
 ```python
 # Merge LoRA adapters into base model
 if hasattr(trainer.model, 'merge_and_unload'):
    merged_model = trainer.model.merge_and_unload()
    merged_model.save_pretrained("production_model")
    tokenizer.save_pretrained("production_model")
 ```
 ### Inference Example
 ```python
 from transformers import pipeline
 generator = pipeline(
    "text-generation",
    model="production_model",
    tokenizer=tokenizer
 )
 result = generator(
    [
        {'role': 'system', 'content': SYSTEM_PROMPT},
        {'role': 'user', 'content': "What is 15 + 27?"}
    ],
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
 )
 print(result[0]['generated_text'])
 ```
 ---
 ## Best Practices Checklist
 **Before Training:**
 - [ ] Validate dataset format (prompts as List[Dict])
 - [ ] Test reward functions on sample data
 - [ ] Calculate expected max_prompt_length from data
 - [ ] Choose appropriate num_generations based on GPU memory
 - [ ] Set up logging (wandb recommended)
 **During Training:**
 - [ ] Monitor reward progression (should increase)
 - [ ] Check reward_std (should stay > 0.1)
 - [ ] Watch for OOM errors (reduce batch size if needed)
 - [ ] Sample generations every 50-100 steps
 - [ ] Validate format compliance on holdout set
 **After Training:**
 - [ ] Merge LoRA weights if using PEFT
 - [ ] Test on diverse prompts
 - [ ] Compare to baseline model
 - [ ] Document reward weights and hyperparameters
 - [ ] Save reproducibility config
 ---
 ## Troubleshooting Guide
 ### Debugging Workflow
 1. **Isolate reward functions** - Test each independently
 2. **Check data distribution** - Ensure diversity in prompts
 3. **Reduce complexity** - Start with single reward, add gradually
 4. **Monitor generations** - Print samples every N steps
 5. **Validate extraction logic** - Ensure answer parsing works
 ### Quick Fixes
 ```python
 # Debug reward function
 def debug_reward(completions, **kwargs):
    responses = [comp[0]['content'] for comp in completions]
    for i, r in enumerate(responses[:2]):  # Print first 2
        print(f"Response {i}: {r[:200]}...")
    return [1.0] * len(responses)  # Dummy rewards
 # Test without training
 trainer = GRPOTrainer(..., reward_funcs=[debug_reward])
 trainer.generate_completions(dataset[:1])  # Generate without updating
 ```
 ---
 ## References and Resources
 **Official Documentation:**
 - TRL GRPO Trainer: https://huggingface.co/docs/trl/grpo_trainer
 - DeepSeek R1 Paper: https://arxiv.org/abs/2501.12948
 - Unsloth Docs: https://docs.unsloth.ai/
 **Example Repositories:**
 - Open R1 Implementation: https://github.com/huggingface/open-r1
 - TRL Examples: https://github.com/huggingface/trl/tree/main/examples
 **Recommended Reading:**
 - Progressive Disclosure Pattern for agent instructions
 - Reward shaping in RL (Ng et al.)
 - LoRA paper (Hu et al., 2021)
 ---
 ## Usage Instructions for Agents
 When this skill is loaded:
 1. **Read this entire file** before implementing GRPO training
 2. **Start with the simplest reward function** (e.g., length-based) to validate setup
 3. **Use the templates** in `templates/` directory as starting points
 4. **Reference examples** in `examples/` for task-specific implementations
 5. **Follow the workflow** sequentially (don't skip steps)
 6. **Debug incrementally** - add one reward function at a time
 **Critical Reminders:**
 - Always use multiple reward functions (3-5 is optimal)
 - Monitor reward metrics, not loss
 - Test reward functions before training
 - Start small (num_generations=4), scale up gradually
 - Save checkpoints frequently (every 100 steps)
 This skill is designed for **expert-level implementation**. Beginners should start with supervised fine-tuning before attempting GRPO.
--- a/skills/mlops/grpo-rl-training/templates/basic_grpo_training.py
+++ b/skills/mlops/grpo-rl-training/templates/basic_grpo_training.py
@ -0,0 +1,228 @@
 """
 Basic GRPO Training Template
 =============================
 A minimal, production-ready template for GRPO training with TRL.
 Adapt this for your specific task by modifying:
 1. Dataset loading (get_dataset function)
 2. Reward functions (reward_*_func)
 3. System prompt (SYSTEM_PROMPT)
 4. Hyperparameters (GRPOConfig)
 """
 import torch
 import re
 from datasets import load_dataset, Dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import LoraConfig
 from trl import GRPOTrainer, GRPOConfig
 # ==================== CONFIGURATION ====================
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
 OUTPUT_DIR = "outputs/grpo-model"
 MAX_PROMPT_LENGTH = 256
 MAX_COMPLETION_LENGTH = 512
 SYSTEM_PROMPT = """
 Respond in the following format:
 <reasoning>
 [Your step-by-step thinking]
 </reasoning>
 <answer>
 [Final answer]
 </answer>
 """
 # ==================== DATASET ====================
 def get_dataset(split="train"):
    """
    Load and prepare your dataset.
    Returns: Dataset with columns:
    - 'prompt': List[Dict] with role/content
    - 'answer': str (ground truth, optional)
    """
    # Example: GSM8K math dataset
    data = load_dataset('openai/gsm8k', 'main')[split]
    def process_example(x):
        # Extract ground truth answer
        answer = x['answer'].split('####')[1].strip() if '####' in x['answer'] else None
        return {
            'prompt': [
                {'role': 'system', 'content': SYSTEM_PROMPT},
                {'role': 'user', 'content': x['question']}
            ],
            'answer': answer
        }
    return data.map(process_example)
 # ==================== HELPER FUNCTIONS ====================
 def extract_xml_tag(text: str, tag: str) -> str:
    """Extract content between XML tags."""
    pattern = f'<{tag}>(.*?)</{tag}>'
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else ""
 def extract_answer(text: str) -> str:
    """Extract the final answer from structured output."""
    return extract_xml_tag(text, 'answer')
 # ==================== REWARD FUNCTIONS ====================
 def correctness_reward_func(prompts, completions, answer, **kwargs):
    """
    Reward correct answers.
    Weight: 2.0 (highest priority)
    """
    responses = [comp[0]['content'] for comp in completions]
    extracted = [extract_answer(r) for r in responses]
    return [2.0 if ans == gt else 0.0 for ans, gt in zip(extracted, answer)]
 def format_reward_func(completions, **kwargs):
    """
    Reward proper XML format.
    Weight: 0.5
    """
    pattern = r'<reasoning>.*?</reasoning>\s*<answer>.*?</answer>'
    responses = [comp[0]['content'] for comp in completions]
    return [0.5 if re.search(pattern, r, re.DOTALL) else 0.0 for r in responses]
 def incremental_format_reward_func(completions, **kwargs):
    """
    Incremental reward for partial format compliance.
    Weight: up to 0.5
    """
    responses = [comp[0]['content'] for comp in completions]
    rewards = []
    for r in responses:
        score = 0.0
        if '<reasoning>' in r:
            score += 0.125
        if '</reasoning>' in r:
            score += 0.125
        if '<answer>' in r:
            score += 0.125
        if '</answer>' in r:
            score += 0.125
        # Penalize extra content after closing tag
        if '</answer>' in r:
            extra = r.split('</answer>')[-1].strip()
            score -= len(extra) * 0.001
        rewards.append(score)
    return rewards
 # ==================== MODEL SETUP ====================
 def setup_model_and_tokenizer():
    """Load model and tokenizer with optimizations."""
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2",
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer
 def get_peft_config():
    """LoRA configuration for parameter-efficient training."""
    return LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=[
            "q_proj", "k_proj", "v_proj", "o_proj",
            "gate_proj", "up_proj", "down_proj"
        ],
        task_type="CAUSAL_LM",
        lora_dropout=0.05,
    )
 # ==================== TRAINING ====================
 def main():
    """Main training function."""
    # Load data
    print("Loading dataset...")
    dataset = get_dataset()
    print(f"Dataset size: {len(dataset)}")
    # Setup model
    print("Loading model...")
    model, tokenizer = setup_model_and_tokenizer()
    # Training configuration
    training_args = GRPOConfig(
        output_dir=OUTPUT_DIR,
        run_name="grpo-training",
        # Learning rate
        learning_rate=5e-6,
        adam_beta1=0.9,
        adam_beta2=0.99,
        weight_decay=0.1,
        warmup_ratio=0.1,
        lr_scheduler_type='cosine',
        # Batch settings
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        # GRPO specific
        num_generations=8,
        max_prompt_length=MAX_PROMPT_LENGTH,
        max_completion_length=MAX_COMPLETION_LENGTH,
        # Training duration
        num_train_epochs=1,
        # Optimization
        bf16=True,
        optim="adamw_8bit",
        max_grad_norm=0.1,
        # Logging
        logging_steps=1,
        save_steps=100,
        report_to="wandb",  # Change to "none" to disable logging
    )
    # Initialize trainer
    trainer = GRPOTrainer(
        model=model,
        processing_class=tokenizer,
        reward_funcs=[
            incremental_format_reward_func,
            format_reward_func,
            correctness_reward_func,
        ],
        args=training_args,
        train_dataset=dataset,
        peft_config=get_peft_config(),
    )
    # Train
    print("Starting training...")
    trainer.train()
    # Save final model
    print(f"Saving model to {OUTPUT_DIR}/final")
    trainer.save_model(f"{OUTPUT_DIR}/final")
    print("Training complete!")
 if __name__ == "__main__":
    main()
--- a/skills/mlops/guidance/SKILL.md
+++ b/skills/mlops/guidance/SKILL.md
@ -0,0 +1,572 @@
 ---
 name: guidance
 description: Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance - Microsoft Research's constrained generation framework
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Prompt Engineering, Guidance, Constrained Generation, Structured Output, JSON Validation, Grammar, Microsoft Research, Format Enforcement, Multi-Step Workflows]
 dependencies: [guidance, transformers]
 ---
 # Guidance: Constrained LLM Generation
 ## When to Use This Skill
 Use Guidance when you need to:
 - **Control LLM output syntax** with regex or grammars
 - **Guarantee valid JSON/XML/code** generation
 - **Reduce latency** vs traditional prompting approaches
 - **Enforce structured formats** (dates, emails, IDs, etc.)
 - **Build multi-step workflows** with Pythonic control flow
 - **Prevent invalid outputs** through grammatical constraints
 **GitHub Stars**: 18,000+ | **From**: Microsoft Research
 ## Installation
 ```bash
 # Base installation
 pip install guidance
 # With specific backends
 pip install guidance[transformers]  # Hugging Face models
 pip install guidance[llama_cpp]     # llama.cpp models
 ```
 ## Quick Start
 ### Basic Example: Structured Generation
 ```python
 from guidance import models, gen
 # Load model (supports OpenAI, Transformers, llama.cpp)
 lm = models.OpenAI("gpt-4")
 # Generate with constraints
 result = lm + "The capital of France is " + gen("capital", max_tokens=5)
 print(result["capital"])  # "Paris"
 ```
 ### With Anthropic Claude
 ```python
 from guidance import models, gen, system, user, assistant
 # Configure Claude
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Use context managers for chat format
 with system():
    lm += "You are a helpful assistant."
 with user():
    lm += "What is the capital of France?"
 with assistant():
    lm += gen(max_tokens=20)
 ```
 ## Core Concepts
 ### 1. Context Managers
 Guidance uses Pythonic context managers for chat-style interactions.
 ```python
 from guidance import system, user, assistant, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # System message
 with system():
    lm += "You are a JSON generation expert."
 # User message
 with user():
    lm += "Generate a person object with name and age."
 # Assistant response
 with assistant():
    lm += gen("response", max_tokens=100)
 print(lm["response"])
 ```
 **Benefits:**
 - Natural chat flow
 - Clear role separation
 - Easy to read and maintain
 ### 2. Constrained Generation
 Guidance ensures outputs match specified patterns using regex or grammars.
 #### Regex Constraints
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Constrain to valid email format
 lm += "Email: " + gen("email", regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
 # Constrain to date format (YYYY-MM-DD)
 lm += "Date: " + gen("date", regex=r"\d{4}-\d{2}-\d{2}")
 # Constrain to phone number
 lm += "Phone: " + gen("phone", regex=r"\d{3}-\d{3}-\d{4}")
 print(lm["email"])  # Guaranteed valid email
 print(lm["date"])   # Guaranteed YYYY-MM-DD format
 ```
 **How it works:**
 - Regex converted to grammar at token level
 - Invalid tokens filtered during generation
 - Model can only produce matching outputs
 #### Selection Constraints
 ```python
 from guidance import models, gen, select
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Constrain to specific choices
 lm += "Sentiment: " + select(["positive", "negative", "neutral"], name="sentiment")
 # Multiple-choice selection
 lm += "Best answer: " + select(
    ["A) Paris", "B) London", "C) Berlin", "D) Madrid"],
    name="answer"
 )
 print(lm["sentiment"])  # One of: positive, negative, neutral
 print(lm["answer"])     # One of: A, B, C, or D
 ```
 ### 3. Token Healing
 Guidance automatically "heals" token boundaries between prompt and generation.
 **Problem:** Tokenization creates unnatural boundaries.
 ```python
 # Without token healing
 prompt = "The capital of France is "
 # Last token: " is "
 # First generated token might be " Par" (with leading space)
 # Result: "The capital of France is  Paris" (double space!)
 ```
 **Solution:** Guidance backs up one token and regenerates.
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Token healing enabled by default
 lm += "The capital of France is " + gen("capital", max_tokens=5)
 # Result: "The capital of France is Paris" (correct spacing)
 ```
 **Benefits:**
 - Natural text boundaries
 - No awkward spacing issues
 - Better model performance (sees natural token sequences)
 ### 4. Grammar-Based Generation
 Define complex structures using context-free grammars.
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # JSON grammar (simplified)
 json_grammar = """
 {
    "name": <gen name regex="[A-Za-z ]+" max_tokens=20>,
    "age": <gen age regex="[0-9]+" max_tokens=3>,
    "email": <gen email regex="[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}" max_tokens=50>
 }
 """
 # Generate valid JSON
 lm += gen("person", grammar=json_grammar)
 print(lm["person"])  # Guaranteed valid JSON structure
 ```
 **Use cases:**
 - Complex structured outputs
 - Nested data structures
 - Programming language syntax
 - Domain-specific languages
 ### 5. Guidance Functions
 Create reusable generation patterns with the `@guidance` decorator.
 ```python
 from guidance import guidance, gen, models
@guidance
 def generate_person(lm):
    """Generate a person with name and age."""
    lm += "Name: " + gen("name", max_tokens=20, stop="\n")
    lm += "\nAge: " + gen("age", regex=r"[0-9]+", max_tokens=3)
    return lm
 # Use the function
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = generate_person(lm)
 print(lm["name"])
 print(lm["age"])
 ```
 **Stateful Functions:**
 ```python
@guidance(stateless=False)
 def react_agent(lm, question, tools, max_rounds=5):
    """ReAct agent with tool use."""
    lm += f"Question: {question}\n\n"
    for i in range(max_rounds):
        # Thought
        lm += f"Thought {i+1}: " + gen("thought", stop="\n")
        # Action
        lm += "\nAction: " + select(list(tools.keys()), name="action")
        # Execute tool
        tool_result = tools[lm["action"]]()
        lm += f"\nObservation: {tool_result}\n\n"
        # Check if done
        lm += "Done? " + select(["Yes", "No"], name="done")
        if lm["done"] == "Yes":
            break
    # Final answer
    lm += "\nFinal Answer: " + gen("answer", max_tokens=100)
    return lm
 ```
 ## Backend Configuration
 ### Anthropic Claude
 ```python
 from guidance import models
 lm = models.Anthropic(
    model="claude-sonnet-4-5-20250929",
    api_key="your-api-key"  # Or set ANTHROPIC_API_KEY env var
 )
 ```
 ### OpenAI
 ```python
 lm = models.OpenAI(
    model="gpt-4o-mini",
    api_key="your-api-key"  # Or set OPENAI_API_KEY env var
 )
 ```
 ### Local Models (Transformers)
 ```python
 from guidance.models import Transformers
 lm = Transformers(
    "microsoft/Phi-4-mini-instruct",
    device="cuda"  # Or "cpu"
 )
 ```
 ### Local Models (llama.cpp)
 ```python
 from guidance.models import LlamaCpp
 lm = LlamaCpp(
    model_path="/path/to/model.gguf",
    n_ctx=4096,
    n_gpu_layers=35
 )
 ```
 ## Common Patterns
 ### Pattern 1: JSON Generation
 ```python
 from guidance import models, gen, system, user, assistant
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 with system():
    lm += "You generate valid JSON."
 with user():
    lm += "Generate a user profile with name, age, and email."
 with assistant():
    lm += """{
    "name": """ + gen("name", regex=r'"[A-Za-z ]+"', max_tokens=30) + """,
    "age": """ + gen("age", regex=r"[0-9]+", max_tokens=3) + """,
    "email": """ + gen("email", regex=r'"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"', max_tokens=50) + """
 }"""
 print(lm)  # Valid JSON guaranteed
 ```
 ### Pattern 2: Classification
 ```python
 from guidance import models, gen, select
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 text = "This product is amazing! I love it."
 lm += f"Text: {text}\n"
 lm += "Sentiment: " + select(["positive", "negative", "neutral"], name="sentiment")
 lm += "\nConfidence: " + gen("confidence", regex=r"[0-9]+", max_tokens=3) + "%"
 print(f"Sentiment: {lm['sentiment']}")
 print(f"Confidence: {lm['confidence']}%")
 ```
 ### Pattern 3: Multi-Step Reasoning
 ```python
 from guidance import models, gen, guidance
@guidance
 def chain_of_thought(lm, question):
    """Generate answer with step-by-step reasoning."""
    lm += f"Question: {question}\n\n"
    # Generate multiple reasoning steps
    for i in range(3):
        lm += f"Step {i+1}: " + gen(f"step_{i+1}", stop="\n", max_tokens=100) + "\n"
    # Final answer
    lm += "\nTherefore, the answer is: " + gen("answer", max_tokens=50)
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = chain_of_thought(lm, "What is 15% of 200?")
 print(lm["answer"])
 ```
 ### Pattern 4: ReAct Agent
 ```python
 from guidance import models, gen, select, guidance
@guidance(stateless=False)
 def react_agent(lm, question):
    """ReAct agent with tool use."""
    tools = {
        "calculator": lambda expr: eval(expr),
        "search": lambda query: f"Search results for: {query}",
    }
    lm += f"Question: {question}\n\n"
    for round in range(5):
        # Thought
        lm += f"Thought: " + gen("thought", stop="\n") + "\n"
        # Action selection
        lm += "Action: " + select(["calculator", "search", "answer"], name="action")
        if lm["action"] == "answer":
            lm += "\nFinal Answer: " + gen("answer", max_tokens=100)
            break
        # Action input
        lm += "\nAction Input: " + gen("action_input", stop="\n") + "\n"
        # Execute tool
        if lm["action"] in tools:
            result = tools[lm["action"]](lm["action_input"])
            lm += f"Observation: {result}\n\n"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = react_agent(lm, "What is 25 * 4 + 10?")
 print(lm["answer"])
 ```
 ### Pattern 5: Data Extraction
 ```python
 from guidance import models, gen, guidance
@guidance
 def extract_entities(lm, text):
    """Extract structured entities from text."""
    lm += f"Text: {text}\n\n"
    # Extract person
    lm += "Person: " + gen("person", stop="\n", max_tokens=30) + "\n"
    # Extract organization
    lm += "Organization: " + gen("organization", stop="\n", max_tokens=30) + "\n"
    # Extract date
    lm += "Date: " + gen("date", regex=r"\d{4}-\d{2}-\d{2}", max_tokens=10) + "\n"
    # Extract location
    lm += "Location: " + gen("location", stop="\n", max_tokens=30) + "\n"
    return lm
 text = "Tim Cook announced at Apple Park on 2024-09-15 in Cupertino."
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = extract_entities(lm, text)
 print(f"Person: {lm['person']}")
 print(f"Organization: {lm['organization']}")
 print(f"Date: {lm['date']}")
 print(f"Location: {lm['location']}")
 ```
 ## Best Practices
 ### 1. Use Regex for Format Validation
 ```python
 # ✅ Good: Regex ensures valid format
 lm += "Email: " + gen("email", regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
 # ❌ Bad: Free generation may produce invalid emails
 lm += "Email: " + gen("email", max_tokens=50)
 ```
 ### 2. Use select() for Fixed Categories
 ```python
 # ✅ Good: Guaranteed valid category
 lm += "Status: " + select(["pending", "approved", "rejected"], name="status")
 # ❌ Bad: May generate typos or invalid values
 lm += "Status: " + gen("status", max_tokens=20)
 ```
 ### 3. Leverage Token Healing
 ```python
 # Token healing is enabled by default
 # No special action needed - just concatenate naturally
 lm += "The capital is " + gen("capital")  # Automatic healing
 ```
 ### 4. Use stop Sequences
 ```python
 # ✅ Good: Stop at newline for single-line outputs
 lm += "Name: " + gen("name", stop="\n")
 # ❌ Bad: May generate multiple lines
 lm += "Name: " + gen("name", max_tokens=50)
 ```
 ### 5. Create Reusable Functions
 ```python
 # ✅ Good: Reusable pattern
@guidance
 def generate_person(lm):
    lm += "Name: " + gen("name", stop="\n")
    lm += "\nAge: " + gen("age", regex=r"[0-9]+")
    return lm
 # Use multiple times
 lm = generate_person(lm)
 lm += "\n\n"
 lm = generate_person(lm)
 ```
 ### 6. Balance Constraints
 ```python
 # ✅ Good: Reasonable constraints
 lm += gen("name", regex=r"[A-Za-z ]+", max_tokens=30)
 # ❌ Too strict: May fail or be very slow
 lm += gen("name", regex=r"^(John|Jane)$", max_tokens=10)
 ```
 ## Comparison to Alternatives
 | Feature | Guidance | Instructor | Outlines | LMQL |
 |---------|----------|------------|----------|------|
 | Regex Constraints | ✅ Yes | ❌ No | ✅ Yes | ✅ Yes |
 | Grammar Support | ✅ CFG | ❌ No | ✅ CFG | ✅ CFG |
 | Pydantic Validation | ❌ No | ✅ Yes | ✅ Yes | ❌ No |
 | Token Healing | ✅ Yes | ❌ No | ✅ Yes | ❌ No |
 | Local Models | ✅ Yes | ⚠️ Limited | ✅ Yes | ✅ Yes |
 | API Models | ✅ Yes | ✅ Yes | ⚠️ Limited | ✅ Yes |
 | Pythonic Syntax | ✅ Yes | ✅ Yes | ✅ Yes | ❌ SQL-like |
 | Learning Curve | Low | Low | Medium | High |
 **When to choose Guidance:**
 - Need regex/grammar constraints
 - Want token healing
 - Building complex workflows with control flow
 - Using local models (Transformers, llama.cpp)
 - Prefer Pythonic syntax
 **When to choose alternatives:**
 - Instructor: Need Pydantic validation with automatic retrying
 - Outlines: Need JSON schema validation
 - LMQL: Prefer declarative query syntax
 ## Performance Characteristics
 **Latency Reduction:**
 - 30-50% faster than traditional prompting for constrained outputs
 - Token healing reduces unnecessary regeneration
 - Grammar constraints prevent invalid token generation
 **Memory Usage:**
 - Minimal overhead vs unconstrained generation
 - Grammar compilation cached after first use
 - Efficient token filtering at inference time
 **Token Efficiency:**
 - Prevents wasted tokens on invalid outputs
 - No need for retry loops
 - Direct path to valid outputs
 ## Resources
 - **Documentation**: https://guidance.readthedocs.io
 - **GitHub**: https://github.com/guidance-ai/guidance (18k+ stars)
 - **Notebooks**: https://github.com/guidance-ai/guidance/tree/main/notebooks
 - **Discord**: Community support available
 ## See Also
 - `references/constraints.md` - Comprehensive regex and grammar patterns
 - `references/backends.md` - Backend-specific configuration
 - `references/examples.md` - Production-ready examples
--- a/skills/mlops/guidance/references/backends.md
+++ b/skills/mlops/guidance/references/backends.md
@ -0,0 +1,554 @@
 # Backend Configuration Guide
 Complete guide to configuring Guidance with different LLM backends.
 ## Table of Contents
 - API-Based Models (Anthropic, OpenAI)
 - Local Models (Transformers, llama.cpp)
 - Backend Comparison
 - Performance Tuning
 - Advanced Configuration
 ## API-Based Models
 ### Anthropic Claude
 #### Basic Setup
 ```python
 from guidance import models
 # Using environment variable
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Reads ANTHROPIC_API_KEY from environment
 # Explicit API key
 lm = models.Anthropic(
    model="claude-sonnet-4-5-20250929",
    api_key="your-api-key-here"
 )
 ```
 #### Available Models
 ```python
 # Claude 3.5 Sonnet (Latest, recommended)
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Claude 3.7 Sonnet (Fast, cost-effective)
 lm = models.Anthropic("claude-sonnet-3.7-20250219")
 # Claude 3 Opus (Most capable)
 lm = models.Anthropic("claude-3-opus-20240229")
 # Claude 3.5 Haiku (Fastest, cheapest)
 lm = models.Anthropic("claude-3-5-haiku-20241022")
 ```
 #### Configuration Options
 ```python
 lm = models.Anthropic(
    model="claude-sonnet-4-5-20250929",
    api_key="your-api-key",
    max_tokens=4096,           # Max tokens to generate
    temperature=0.7,            # Sampling temperature (0-1)
    top_p=0.9,                  # Nucleus sampling
    timeout=30,                 # Request timeout (seconds)
    max_retries=3              # Retry failed requests
 )
 ```
 #### With Context Managers
 ```python
 from guidance import models, system, user, assistant, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 with system():
    lm += "You are a helpful assistant."
 with user():
    lm += "What is the capital of France?"
 with assistant():
    lm += gen(max_tokens=50)
 print(lm)
 ```
 ### OpenAI
 #### Basic Setup
 ```python
 from guidance import models
 # Using environment variable
 lm = models.OpenAI("gpt-4o")
 # Reads OPENAI_API_KEY from environment
 # Explicit API key
 lm = models.OpenAI(
    model="gpt-4o",
    api_key="your-api-key-here"
 )
 ```
 #### Available Models
 ```python
 # GPT-4o (Latest, multimodal)
 lm = models.OpenAI("gpt-4o")
 # GPT-4o Mini (Fast, cost-effective)
 lm = models.OpenAI("gpt-4o-mini")
 # GPT-4 Turbo
 lm = models.OpenAI("gpt-4-turbo")
 # GPT-3.5 Turbo (Cheapest)
 lm = models.OpenAI("gpt-3.5-turbo")
 ```
 #### Configuration Options
 ```python
 lm = models.OpenAI(
    model="gpt-4o-mini",
    api_key="your-api-key",
    max_tokens=2048,
    temperature=0.7,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    timeout=30
 )
 ```
 #### Chat Format
 ```python
 from guidance import models, gen
 lm = models.OpenAI("gpt-4o-mini")
 # OpenAI uses chat format
 lm += [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is 2+2?"}
 ]
 # Generate response
 lm += gen(max_tokens=50)
 ```
 ### Azure OpenAI
 ```python
 from guidance import models
 lm = models.AzureOpenAI(
    model="gpt-4o",
    azure_endpoint="https://your-resource.openai.azure.com/",
    api_key="your-azure-api-key",
    api_version="2024-02-15-preview",
    deployment_name="your-deployment-name"
 )
 ```
 ## Local Models
 ### Transformers (Hugging Face)
 #### Basic Setup
 ```python
 from guidance.models import Transformers
 # Load model from Hugging Face
 lm = Transformers("microsoft/Phi-4-mini-instruct")
 ```
 #### GPU Configuration
 ```python
 # Use GPU
 lm = Transformers(
    "microsoft/Phi-4-mini-instruct",
    device="cuda"
 )
 # Use specific GPU
 lm = Transformers(
    "microsoft/Phi-4-mini-instruct",
    device="cuda:0"  # GPU 0
 )
 # Use CPU
 lm = Transformers(
    "microsoft/Phi-4-mini-instruct",
    device="cpu"
 )
 ```
 #### Advanced Configuration
 ```python
 lm = Transformers(
    "microsoft/Phi-4-mini-instruct",
    device="cuda",
    torch_dtype="float16",      # Use FP16 (faster, less memory)
    load_in_8bit=True,          # 8-bit quantization
    max_memory={0: "20GB"},     # GPU memory limit
    offload_folder="./offload"  # Offload to disk if needed
 )
 ```
 #### Popular Models
 ```python
 # Phi-4 (Microsoft)
 lm = Transformers("microsoft/Phi-4-mini-instruct")
 lm = Transformers("microsoft/Phi-3-medium-4k-instruct")
 # Llama 3 (Meta)
 lm = Transformers("meta-llama/Llama-3.1-8B-Instruct")
 lm = Transformers("meta-llama/Llama-3.1-70B-Instruct")
 # Mistral (Mistral AI)
 lm = Transformers("mistralai/Mistral-7B-Instruct-v0.3")
 lm = Transformers("mistralai/Mixtral-8x7B-Instruct-v0.1")
 # Qwen (Alibaba)
 lm = Transformers("Qwen/Qwen2.5-7B-Instruct")
 # Gemma (Google)
 lm = Transformers("google/gemma-2-9b-it")
 ```
 #### Generation Configuration
 ```python
 lm = Transformers(
    "microsoft/Phi-4-mini-instruct",
    device="cuda"
 )
 # Configure generation
 from guidance import gen
 result = lm + gen(
    max_tokens=100,
    temperature=0.7,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1
 )
 ```
 ### llama.cpp
 #### Basic Setup
 ```python
 from guidance.models import LlamaCpp
 # Load GGUF model
 lm = LlamaCpp(
    model_path="/path/to/model.gguf",
    n_ctx=4096  # Context window
 )
 ```
 #### GPU Configuration
 ```python
 # Use GPU acceleration
 lm = LlamaCpp(
    model_path="/path/to/model.gguf",
    n_ctx=4096,
    n_gpu_layers=35,  # Offload 35 layers to GPU
    n_threads=8       # CPU threads for remaining layers
 )
 # Full GPU offload
 lm = LlamaCpp(
    model_path="/path/to/model.gguf",
    n_ctx=4096,
    n_gpu_layers=-1  # Offload all layers
 )
 ```
 #### Advanced Configuration
 ```python
 lm = LlamaCpp(
    model_path="/path/to/llama-3.1-8b-instruct.Q4_K_M.gguf",
    n_ctx=8192,          # Context window (tokens)
    n_gpu_layers=35,     # GPU layers
    n_threads=8,         # CPU threads
    n_batch=512,         # Batch size for prompt processing
    use_mmap=True,       # Memory-map the model file
    use_mlock=False,     # Lock model in RAM
    seed=42,             # Random seed
    verbose=False        # Suppress verbose output
 )
 ```
 #### Quantized Models
 ```python
 # Q4_K_M (4-bit, recommended for most cases)
 lm = LlamaCpp("/path/to/model.Q4_K_M.gguf")
 # Q5_K_M (5-bit, better quality)
 lm = LlamaCpp("/path/to/model.Q5_K_M.gguf")
 # Q8_0 (8-bit, high quality)
 lm = LlamaCpp("/path/to/model.Q8_0.gguf")
 # F16 (16-bit float, highest quality)
 lm = LlamaCpp("/path/to/model.F16.gguf")
 ```
 #### Popular GGUF Models
 ```python
 # Llama 3.1
 lm = LlamaCpp("llama-3.1-8b-instruct.Q4_K_M.gguf")
 # Mistral
 lm = LlamaCpp("mistral-7b-instruct-v0.3.Q4_K_M.gguf")
 # Phi-4
 lm = LlamaCpp("phi-4-mini-instruct.Q4_K_M.gguf")
 ```
 ## Backend Comparison
 ### Feature Matrix
 | Feature | Anthropic | OpenAI | Transformers | llama.cpp |
 |---------|-----------|--------|--------------|-----------|
 | Constrained Generation | ✅ Full | ✅ Full | ✅ Full | ✅ Full |
 | Token Healing | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
 | Streaming | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
 | GPU Support | N/A | N/A | ✅ Yes | ✅ Yes |
 | Quantization | N/A | N/A | ✅ Yes | ✅ Yes |
 | Cost | $$$ | $$$ | Free | Free |
 | Latency | Low | Low | Medium | Low |
 | Setup Difficulty | Easy | Easy | Medium | Medium |
 ### Performance Characteristics
 **Anthropic Claude:**
 - **Latency**: 200-500ms (API call)
 - **Throughput**: Limited by API rate limits
 - **Cost**: $3-15 per 1M input tokens
 - **Best for**: Production systems, high-quality outputs
 **OpenAI:**
 - **Latency**: 200-400ms (API call)
 - **Throughput**: Limited by API rate limits
 - **Cost**: $0.15-30 per 1M input tokens
 - **Best for**: Cost-sensitive production, gpt-4o-mini
 **Transformers:**
 - **Latency**: 50-200ms (local inference)
 - **Throughput**: GPU-dependent (10-100 tokens/sec)
 - **Cost**: Hardware cost only
 - **Best for**: Privacy-sensitive, high-volume, experimentation
 **llama.cpp:**
 - **Latency**: 30-150ms (local inference)
 - **Throughput**: Hardware-dependent (20-150 tokens/sec)
 - **Cost**: Hardware cost only
 - **Best for**: Edge deployment, Apple Silicon, CPU inference
 ### Memory Requirements
 **Transformers (FP16):**
 - 7B model: ~14GB GPU VRAM
 - 13B model: ~26GB GPU VRAM
 - 70B model: ~140GB GPU VRAM (multi-GPU)
 **llama.cpp (Q4_K_M):**
 - 7B model: ~4.5GB RAM
 - 13B model: ~8GB RAM
 - 70B model: ~40GB RAM
 **Optimization Tips:**
 - Use quantized models (Q4_K_M) for lower memory
 - Use GPU offloading for faster inference
 - Use CPU inference for smaller models (<7B)
 ## Performance Tuning
 ### API Models (Anthropic, OpenAI)
 #### Reduce Latency
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Use lower max_tokens (faster response)
 lm += gen(max_tokens=100)  # Instead of 1000
 # Use streaming (perceived latency reduction)
 for chunk in lm.stream(gen(max_tokens=500)):
    print(chunk, end="", flush=True)
 ```
 #### Reduce Cost
 ```python
 # Use cheaper models
 lm = models.Anthropic("claude-3-5-haiku-20241022")  # vs Sonnet
 lm = models.OpenAI("gpt-4o-mini")  # vs gpt-4o
 # Reduce context size
 # - Keep prompts concise
 # - Avoid large few-shot examples
 # - Use max_tokens limits
 ```
 ### Local Models (Transformers, llama.cpp)
 #### Optimize GPU Usage
 ```python
 from guidance.models import Transformers
 # Use FP16 for 2x speedup
 lm = Transformers(
    "meta-llama/Llama-3.1-8B-Instruct",
    device="cuda",
    torch_dtype="float16"
 )
 # Use 8-bit quantization for 4x memory reduction
 lm = Transformers(
    "meta-llama/Llama-3.1-8B-Instruct",
    device="cuda",
    load_in_8bit=True
 )
 # Use flash attention (requires flash-attn package)
 lm = Transformers(
    "meta-llama/Llama-3.1-8B-Instruct",
    device="cuda",
    use_flash_attention_2=True
 )
 ```
 #### Optimize llama.cpp
 ```python
 from guidance.models import LlamaCpp
 # Maximize GPU layers
 lm = LlamaCpp(
    model_path="/path/to/model.Q4_K_M.gguf",
    n_gpu_layers=-1  # All layers on GPU
 )
 # Optimize batch size
 lm = LlamaCpp(
    model_path="/path/to/model.Q4_K_M.gguf",
    n_batch=512,     # Larger batch = faster prompt processing
    n_gpu_layers=-1
 )
 # Use Metal (Apple Silicon)
 lm = LlamaCpp(
    model_path="/path/to/model.Q4_K_M.gguf",
    n_gpu_layers=-1,  # Use Metal GPU acceleration
    use_mmap=True
 )
 ```
 #### Batch Processing
 ```python
 # Process multiple requests efficiently
 requests = [
    "What is 2+2?",
    "What is the capital of France?",
    "What is photosynthesis?"
 ]
 # Bad: Sequential processing
 for req in requests:
    lm = Transformers("microsoft/Phi-4-mini-instruct")
    lm += req + gen(max_tokens=50)
 # Good: Reuse loaded model
 lm = Transformers("microsoft/Phi-4-mini-instruct")
 for req in requests:
    lm += req + gen(max_tokens=50)
 ```
 ## Advanced Configuration
 ### Custom Model Configurations
 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from guidance.models import Transformers
 # Load custom model
 tokenizer = AutoTokenizer.from_pretrained("your-model")
 model = AutoModelForCausalLM.from_pretrained(
    "your-model",
    device_map="auto",
    torch_dtype="float16"
 )
 # Use with Guidance
 lm = Transformers(model=model, tokenizer=tokenizer)
 ```
 ### Environment Variables
 ```bash
 # API keys
 export ANTHROPIC_API_KEY="sk-ant-..."
 export OPENAI_API_KEY="sk-..."
 # Transformers cache
 export HF_HOME="/path/to/cache"
 export TRANSFORMERS_CACHE="/path/to/cache"
 # GPU selection
 export CUDA_VISIBLE_DEVICES=0,1  # Use GPU 0 and 1
 ```
 ### Debugging
 ```python
 # Enable verbose logging
 import logging
 logging.basicConfig(level=logging.DEBUG)
 # Check backend info
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 print(f"Model: {lm.model_name}")
 print(f"Backend: {lm.backend}")
 # Check GPU usage (Transformers)
 lm = Transformers("microsoft/Phi-4-mini-instruct", device="cuda")
 print(f"Device: {lm.device}")
 print(f"Memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
 ```
 ## Resources
 - **Anthropic Docs**: https://docs.anthropic.com
 - **OpenAI Docs**: https://platform.openai.com/docs
 - **Hugging Face Models**: https://huggingface.co/models
 - **llama.cpp**: https://github.com/ggerganov/llama.cpp
 - **GGUF Models**: https://huggingface.co/models?library=gguf
--- a/skills/mlops/guidance/references/constraints.md
+++ b/skills/mlops/guidance/references/constraints.md
@ -0,0 +1,674 @@
 # Comprehensive Constraint Patterns
 Guide to regex constraints, grammar-based generation, and token healing in Guidance.
 ## Table of Contents
 - Regex Constraints
 - Grammar-Based Generation
 - Token Healing
 - Selection Constraints
 - Complex Patterns
 - Performance Optimization
 ## Regex Constraints
 ### Basic Patterns
 #### Numeric Constraints
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Integer (positive)
 lm += "Age: " + gen("age", regex=r"[0-9]+")
 # Integer (with negatives)
 lm += "Temperature: " + gen("temp", regex=r"-?[0-9]+")
 # Float (positive)
 lm += "Price: $" + gen("price", regex=r"[0-9]+\.[0-9]{2}")
 # Float (with negatives and optional decimals)
 lm += "Value: " + gen("value", regex=r"-?[0-9]+(\.[0-9]+)?")
 # Percentage (0-100)
 lm += "Progress: " + gen("progress", regex=r"(100|[0-9]{1,2})")
 # Range (1-5 stars)
 lm += "Rating: " + gen("rating", regex=r"[1-5]") + " stars"
 ```
 #### Text Constraints
 ```python
 # Alphabetic only
 lm += "Name: " + gen("name", regex=r"[A-Za-z]+")
 # Alphabetic with spaces
 lm += "Full Name: " + gen("full_name", regex=r"[A-Za-z ]+")
 # Alphanumeric
 lm += "Username: " + gen("username", regex=r"[A-Za-z0-9_]+")
 # Capitalized words
 lm += "Title: " + gen("title", regex=r"[A-Z][a-z]+( [A-Z][a-z]+)*")
 # Lowercase only
 lm += "Code: " + gen("code", regex=r"[a-z0-9-]+")
 # Specific length
 lm += "ID: " + gen("id", regex=r"[A-Z]{3}-[0-9]{6}")  # e.g., "ABC-123456"
 ```
 #### Date and Time Constraints
 ```python
 # Date (YYYY-MM-DD)
 lm += "Date: " + gen("date", regex=r"\d{4}-\d{2}-\d{2}")
 # Date (MM/DD/YYYY)
 lm += "Date: " + gen("date_us", regex=r"\d{2}/\d{2}/\d{4}")
 # Time (HH:MM)
 lm += "Time: " + gen("time", regex=r"\d{2}:\d{2}")
 # Time (HH:MM:SS)
 lm += "Time: " + gen("time_full", regex=r"\d{2}:\d{2}:\d{2}")
 # ISO 8601 datetime
 lm += "Timestamp: " + gen(
    "timestamp",
    regex=r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z"
 )
 # Year (YYYY)
 lm += "Year: " + gen("year", regex=r"(19|20)\d{2}")
 # Month name
 lm += "Month: " + gen(
    "month",
    regex=r"(January|February|March|April|May|June|July|August|September|October|November|December)"
 )
 ```
 #### Contact Information
 ```python
 # Email
 lm += "Email: " + gen(
    "email",
    regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
 )
 # Phone (US format)
 lm += "Phone: " + gen("phone", regex=r"\d{3}-\d{3}-\d{4}")
 # Phone (international format)
 lm += "Phone: " + gen("phone_intl", regex=r"\+[0-9]{1,3}-[0-9]{1,14}")
 # ZIP code (US)
 lm += "ZIP: " + gen("zip", regex=r"\d{5}(-\d{4})?")
 # Postal code (Canada)
 lm += "Postal: " + gen("postal", regex=r"[A-Z]\d[A-Z] \d[A-Z]\d")
 # URL
 lm += "URL: " + gen(
    "url",
    regex=r"https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/[a-zA-Z0-9._~:/?#\[\]@!$&'()*+,;=-]*)?"
 )
 ```
 ### Advanced Patterns
 #### JSON Field Constraints
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # String field with quotes
 lm += '"name": ' + gen("name", regex=r'"[A-Za-z ]+"')
 # Numeric field (no quotes)
 lm += '"age": ' + gen("age", regex=r"[0-9]+")
 # Boolean field
 lm += '"active": ' + gen("active", regex=r"(true|false)")
 # Null field
 lm += '"optional": ' + gen("optional", regex=r"(null|[0-9]+)")
 # Array of strings
 lm += '"tags": [' + gen(
    "tags",
    regex=r'"[a-z]+"(, "[a-z]+")*'
 ) + ']'
 # Complete JSON object
 lm += """{
    "name": """ + gen("name", regex=r'"[A-Za-z ]+"') + """,
    "age": """ + gen("age", regex=r"[0-9]+") + """,
    "email": """ + gen(
        "email",
        regex=r'"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"'
    ) + """
 }"""
 ```
 #### Code Patterns
 ```python
 # Python variable name
 lm += "Variable: " + gen("var", regex=r"[a-z_][a-z0-9_]*")
 # Python function name
 lm += "Function: " + gen("func", regex=r"[a-z_][a-z0-9_]*")
 # Hex color code
 lm += "Color: #" + gen("color", regex=r"[0-9A-Fa-f]{6}")
 # UUID
 lm += "UUID: " + gen(
    "uuid",
    regex=r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
 )
 # Git commit hash (short)
 lm += "Commit: " + gen("commit", regex=r"[0-9a-f]{7}")
 # Semantic version
 lm += "Version: " + gen("version", regex=r"[0-9]+\.[0-9]+\.[0-9]+")
 # IP address (IPv4)
 lm += "IP: " + gen(
    "ip",
    regex=r"((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
 )
 ```
 #### Domain-Specific Patterns
 ```python
 # Credit card number
 lm += "Card: " + gen("card", regex=r"\d{4}-\d{4}-\d{4}-\d{4}")
 # Social Security Number (US)
 lm += "SSN: " + gen("ssn", regex=r"\d{3}-\d{2}-\d{4}")
 # ISBN-13
 lm += "ISBN: " + gen("isbn", regex=r"978-\d{1,5}-\d{1,7}-\d{1,7}-\d")
 # License plate (US)
 lm += "Plate: " + gen("plate", regex=r"[A-Z]{3}-\d{4}")
 # Currency amount
 lm += "Amount: $" + gen("amount", regex=r"[0-9]{1,3}(,[0-9]{3})*\.[0-9]{2}")
 # Percentage with decimal
 lm += "Rate: " + gen("rate", regex=r"[0-9]+\.[0-9]{1,2}%")
 ```
 ## Grammar-Based Generation
 ### JSON Grammar
 ```python
 from guidance import models, gen, guidance
@guidance
 def json_object(lm):
    """Generate valid JSON object."""
    lm += "{\n"
    # Name field (required)
    lm += '    "name": ' + gen("name", regex=r'"[A-Za-z ]+"') + ",\n"
    # Age field (required)
    lm += '    "age": ' + gen("age", regex=r"[0-9]+") + ",\n"
    # Email field (required)
    lm += '    "email": ' + gen(
        "email",
        regex=r'"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"'
    ) + ",\n"
    # Active field (required, boolean)
    lm += '    "active": ' + gen("active", regex=r"(true|false)") + "\n"
    lm += "}"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = json_object(lm)
 print(lm)  # Valid JSON guaranteed
 ```
 ### Nested JSON Grammar
 ```python
@guidance
 def nested_json(lm):
    """Generate nested JSON structure."""
    lm += "{\n"
    # User object
    lm += '    "user": {\n'
    lm += '        "name": ' + gen("name", regex=r'"[A-Za-z ]+"') + ",\n"
    lm += '        "age": ' + gen("age", regex=r"[0-9]+") + "\n"
    lm += "    },\n"
    # Address object
    lm += '    "address": {\n'
    lm += '        "street": ' + gen("street", regex=r'"[A-Za-z0-9 ]+"') + ",\n"
    lm += '        "city": ' + gen("city", regex=r'"[A-Za-z ]+"') + ",\n"
    lm += '        "zip": ' + gen("zip", regex=r'"\d{5}"') + "\n"
    lm += "    }\n"
    lm += "}"
    return lm
 ```
 ### Array Grammar
 ```python
@guidance
 def json_array(lm, count=3):
    """Generate JSON array with fixed count."""
    lm += "[\n"
    for i in range(count):
        lm += "    {\n"
        lm += '        "id": ' + gen(f"id_{i}", regex=r"[0-9]+") + ",\n"
        lm += '        "name": ' + gen(f"name_{i}", regex=r'"[A-Za-z ]+"') + "\n"
        lm += "    }"
        if i < count - 1:
            lm += ","
        lm += "\n"
    lm += "]"
    return lm
 ```
 ### XML Grammar
 ```python
@guidance
 def xml_document(lm):
    """Generate valid XML document."""
    lm += '<?xml version="1.0"?>\n'
    lm += "<person>\n"
    # Name element
    lm += "    <name>" + gen("name", regex=r"[A-Za-z ]+") + "</name>\n"
    # Age element
    lm += "    <age>" + gen("age", regex=r"[0-9]+") + "</age>\n"
    # Email element
    lm += "    <email>" + gen(
        "email",
        regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
    ) + "</email>\n"
    lm += "</person>"
    return lm
 ```
 ### CSV Grammar
 ```python
@guidance
 def csv_row(lm):
    """Generate CSV row."""
    lm += gen("name", regex=r"[A-Za-z ]+") + ","
    lm += gen("age", regex=r"[0-9]+") + ","
    lm += gen("email", regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
    return lm
@guidance
 def csv_document(lm, rows=5):
    """Generate complete CSV."""
    # Header
    lm += "Name,Age,Email\n"
    # Rows
    for i in range(rows):
        lm = csv_row(lm)
        if i < rows - 1:
            lm += "\n"
    return lm
 ```
 ## Token Healing
 ### How Token Healing Works
 **Problem:** Tokenization creates unnatural boundaries.
 ```python
 # Example without token healing
 prompt = "The capital of France is "
 # Tokenization: ["The", " capital", " of", " France", " is", " "]
 # Model sees last token: " "
 # First generated token might include leading space: " Paris"
 # Result: "The capital of France is  Paris" (double space)
 ```
 **Solution:** Guidance backs up and regenerates the last token.
 ```python
 from guidance import models, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Token healing enabled by default
 lm += "The capital of France is " + gen("capital", max_tokens=5)
 # Process:
 # 1. Back up to token before " is "
 # 2. Regenerate " is" + "capital" together
 # 3. Result: "The capital of France is Paris" (correct)
 ```
 ### Token Healing Examples
 #### Natural Continuations
 ```python
 # Before token healing
 lm += "The function name is get" + gen("rest")
 # Might generate: "The function name is get User" (space before User)
 # With token healing
 lm += "The function name is get" + gen("rest")
 # Generates: "The function name is getUser" (correct camelCase)
 ```
 #### Code Generation
 ```python
 # Function name completion
 lm += "def calculate_" + gen("rest", stop="(")
 # Token healing ensures smooth connection: "calculate_total"
 # Variable name completion
 lm += "my_" + gen("var_name", regex=r"[a-z_]+")
 # Token healing ensures: "my_variable_name" (not "my_ variable_name")
 ```
 #### Domain-Specific Terms
 ```python
 # Medical terms
 lm += "The patient has hyper" + gen("condition")
 # Token healing helps: "hypertension" (not "hyper tension")
 # Technical terms
 lm += "Using micro" + gen("tech")
 # Token healing helps: "microservices" (not "micro services")
 ```
 ### Disabling Token Healing
 ```python
 # Disable token healing if needed (rare)
 lm += gen("text", token_healing=False)
 ```
 ## Selection Constraints
 ### Basic Selection
 ```python
 from guidance import models, select
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 # Simple selection
 lm += "Status: " + select(["active", "inactive", "pending"], name="status")
 # Boolean selection
 lm += "Approved: " + select(["Yes", "No"], name="approved")
 # Multiple choice
 lm += "Answer: " + select(
    ["A) Paris", "B) London", "C) Berlin", "D) Madrid"],
    name="answer"
 )
 ```
 ### Conditional Selection
 ```python
 from guidance import models, select, gen, guidance
@guidance
 def conditional_fields(lm):
    """Generate fields conditionally based on type."""
    lm += "Type: " + select(["person", "company"], name="type")
    if lm["type"] == "person":
        lm += "\nName: " + gen("name", regex=r"[A-Za-z ]+")
        lm += "\nAge: " + gen("age", regex=r"[0-9]+")
    else:
        lm += "\nCompany Name: " + gen("company", regex=r"[A-Za-z ]+")
        lm += "\nEmployees: " + gen("employees", regex=r"[0-9]+")
    return lm
 ```
 ### Repeated Selection
 ```python
@guidance
 def multiple_selections(lm):
    """Select multiple items."""
    lm += "Select 3 colors:\n"
    colors = ["red", "blue", "green", "yellow", "purple"]
    for i in range(3):
        lm += f"{i+1}. " + select(colors, name=f"color_{i}") + "\n"
    return lm
 ```
 ## Complex Patterns
 ### Pattern 1: Structured Forms
 ```python
@guidance
 def user_form(lm):
    """Generate structured user form."""
    lm += "=== User Registration ===\n\n"
    # Name (alphabetic only)
    lm += "Full Name: " + gen("name", regex=r"[A-Za-z ]+", stop="\n") + "\n"
    # Age (numeric)
    lm += "Age: " + gen("age", regex=r"[0-9]+", max_tokens=3) + "\n"
    # Email (validated format)
    lm += "Email: " + gen(
        "email",
        regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
        stop="\n"
    ) + "\n"
    # Phone (US format)
    lm += "Phone: " + gen("phone", regex=r"\d{3}-\d{3}-\d{4}") + "\n"
    # Account type (selection)
    lm += "Account Type: " + select(
        ["Standard", "Premium", "Enterprise"],
        name="account_type"
    ) + "\n"
    # Active status (boolean)
    lm += "Active: " + select(["Yes", "No"], name="active") + "\n"
    return lm
 ```
 ### Pattern 2: Multi-Entity Extraction
 ```python
@guidance
 def extract_entities(lm, text):
    """Extract multiple entities with constraints."""
    lm += f"Text: {text}\n\n"
    # Person name (alphabetic)
    lm += "Person: " + gen("person", regex=r"[A-Za-z ]+", stop="\n") + "\n"
    # Organization (alphanumeric with spaces)
    lm += "Organization: " + gen(
        "organization",
        regex=r"[A-Za-z0-9 ]+",
        stop="\n"
    ) + "\n"
    # Date (YYYY-MM-DD format)
    lm += "Date: " + gen("date", regex=r"\d{4}-\d{2}-\d{2}") + "\n"
    # Location (alphabetic with spaces)
    lm += "Location: " + gen("location", regex=r"[A-Za-z ]+", stop="\n") + "\n"
    # Amount (currency)
    lm += "Amount: $" + gen("amount", regex=r"[0-9,]+\.[0-9]{2}") + "\n"
    return lm
 ```
 ### Pattern 3: Code Generation
 ```python
@guidance
 def generate_python_function(lm):
    """Generate Python function with constraints."""
    # Function name (valid Python identifier)
    lm += "def " + gen("func_name", regex=r"[a-z_][a-z0-9_]*") + "("
    # Parameter name
    lm += gen("param", regex=r"[a-z_][a-z0-9_]*") + "):\n"
    # Docstring
    lm += '    """' + gen("docstring", stop='"""', max_tokens=50) + '"""\n'
    # Function body (constrained to valid Python)
    lm += "    return " + gen("return_value", stop="\n") + "\n"
    return lm
 ```
 ### Pattern 4: Hierarchical Data
 ```python
@guidance
 def org_chart(lm):
    """Generate organizational chart."""
    lm += "Company: " + gen("company", regex=r"[A-Za-z ]+") + "\n\n"
    # CEO
    lm += "CEO: " + gen("ceo", regex=r"[A-Za-z ]+") + "\n"
    # Departments
    for dept in ["Engineering", "Sales", "Marketing"]:
        lm += f"\n{dept} Department:\n"
        lm += "  Head: " + gen(f"{dept.lower()}_head", regex=r"[A-Za-z ]+") + "\n"
        lm += "  Size: " + gen(f"{dept.lower()}_size", regex=r"[0-9]+") + " employees\n"
    return lm
 ```
 ## Performance Optimization
 ### Best Practices
 #### 1. Use Specific Patterns
 ```python
 # ✅ Good: Specific pattern
 lm += gen("age", regex=r"[0-9]{1,3}")  # Fast
 # ❌ Bad: Overly broad pattern
 lm += gen("age", regex=r"[0-9]+")  # Slower
 ```
 #### 2. Limit Max Tokens
 ```python
 # ✅ Good: Reasonable limit
 lm += gen("name", max_tokens=30)
 # ❌ Bad: No limit
 lm += gen("name")  # May generate forever
 ```
 #### 3. Use stop Sequences
 ```python
 # ✅ Good: Stop at newline
 lm += gen("line", stop="\n")
 # ❌ Bad: Rely on max_tokens
 lm += gen("line", max_tokens=100)
 ```
 #### 4. Cache Compiled Grammars
 ```python
 # Grammars are cached automatically after first use
 # No manual caching needed
@guidance
 def reusable_pattern(lm):
    """This grammar is compiled once and cached."""
    lm += gen("email", regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
    return lm
 # First call: compiles grammar
 lm = reusable_pattern(lm)
 # Subsequent calls: uses cached grammar (fast)
 lm = reusable_pattern(lm)
 ```
 #### 5. Avoid Overlapping Constraints
 ```python
 # ✅ Good: Clear constraints
 lm += gen("age", regex=r"[0-9]+", max_tokens=3)
 # ❌ Bad: Conflicting constraints
 lm += gen("age", regex=r"[0-9]{2}", max_tokens=10)  # max_tokens unnecessary
 ```
 ### Performance Benchmarks
 **Regex vs Free Generation:**
 - Simple regex (digits): ~1.2x slower than free gen
 - Complex regex (email): ~1.5x slower than free gen
 - Grammar-based: ~2x slower than free gen
 **But:**
 - 100% valid outputs (vs ~70% with free gen + validation)
 - No retry loops needed
 - Overall faster end-to-end for structured outputs
 **Optimization Tips:**
 - Use regex for critical fields only
 - Use `select()` for small fixed sets (fastest)
 - Use `stop` sequences when possible (faster than max_tokens)
 - Cache compiled grammars by reusing functions
 ## Resources
 - **Token Healing Paper**: https://arxiv.org/abs/2306.17648
 - **Guidance Docs**: https://guidance.readthedocs.io
 - **GitHub**: https://github.com/guidance-ai/guidance
--- a/skills/mlops/guidance/references/examples.md
+++ b/skills/mlops/guidance/references/examples.md
@ -0,0 +1,767 @@
 # Production-Ready Examples
 Real-world examples of using Guidance for structured generation, agents, and workflows.
 ## Table of Contents
 - JSON Generation
 - Data Extraction
 - Classification Systems
 - Agent Systems
 - Multi-Step Workflows
 - Code Generation
 - Production Tips
 ## JSON Generation
 ### Basic JSON
 ```python
 from guidance import models, gen, guidance
@guidance
 def generate_user(lm):
    """Generate valid user JSON."""
    lm += "{\n"
    lm += '  "name": ' + gen("name", regex=r'"[A-Za-z ]+"') + ",\n"
    lm += '  "age": ' + gen("age", regex=r"[0-9]+") + ",\n"
    lm += '  "email": ' + gen(
        "email",
        regex=r'"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"'
    ) + "\n"
    lm += "}"
    return lm
 # Use it
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm += "Generate a user profile:\n"
 lm = generate_user(lm)
 print(lm)
 # Output: Valid JSON guaranteed
 ```
 ### Nested JSON
 ```python
@guidance
 def generate_order(lm):
    """Generate nested order JSON."""
    lm += "{\n"
    # Customer info
    lm += '  "customer": {\n'
    lm += '    "name": ' + gen("customer_name", regex=r'"[A-Za-z ]+"') + ",\n"
    lm += '    "email": ' + gen(
        "customer_email",
        regex=r'"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"'
    ) + "\n"
    lm += "  },\n"
    # Order details
    lm += '  "order": {\n'
    lm += '    "id": ' + gen("order_id", regex=r'"ORD-[0-9]{6}"') + ",\n"
    lm += '    "date": ' + gen("order_date", regex=r'"\d{4}-\d{2}-\d{2}"') + ",\n"
    lm += '    "total": ' + gen("order_total", regex=r"[0-9]+\.[0-9]{2}") + "\n"
    lm += "  },\n"
    # Status
    lm += '  "status": ' + gen(
        "status",
        regex=r'"(pending|processing|shipped|delivered)"'
    ) + "\n"
    lm += "}"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = generate_order(lm)
 ```
 ### JSON Array
 ```python
@guidance
 def generate_user_list(lm, count=3):
    """Generate JSON array of users."""
    lm += "[\n"
    for i in range(count):
        lm += "  {\n"
        lm += '    "id": ' + gen(f"id_{i}", regex=r"[0-9]+") + ",\n"
        lm += '    "name": ' + gen(f"name_{i}", regex=r'"[A-Za-z ]+"') + ",\n"
        lm += '    "active": ' + gen(f"active_{i}", regex=r"(true|false)") + "\n"
        lm += "  }"
        if i < count - 1:
            lm += ","
        lm += "\n"
    lm += "]"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = generate_user_list(lm, count=5)
 ```
 ### Dynamic JSON Schema
 ```python
 import json
 from guidance import models, gen, guidance
@guidance
 def json_from_schema(lm, schema):
    """Generate JSON matching a schema."""
    lm += "{\n"
    fields = list(schema["properties"].items())
    for i, (field_name, field_schema) in enumerate(fields):
        lm += f'  "{field_name}": '
        # Handle different types
        if field_schema["type"] == "string":
            if "pattern" in field_schema:
                lm += gen(field_name, regex=f'"{field_schema["pattern"]}"')
            else:
                lm += gen(field_name, regex=r'"[^"]+"')
        elif field_schema["type"] == "number":
            lm += gen(field_name, regex=r"[0-9]+(\.[0-9]+)?")
        elif field_schema["type"] == "integer":
            lm += gen(field_name, regex=r"[0-9]+")
        elif field_schema["type"] == "boolean":
            lm += gen(field_name, regex=r"(true|false)")
        if i < len(fields) - 1:
            lm += ","
        lm += "\n"
    lm += "}"
    return lm
 # Define schema
 schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "score": {"type": "number"},
        "active": {"type": "boolean"}
    }
 }
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = json_from_schema(lm, schema)
 ```
 ## Data Extraction
 ### Extract from Text
 ```python
 from guidance import models, gen, guidance, system, user, assistant
@guidance
 def extract_person_info(lm, text):
    """Extract structured info from text."""
    lm += f"Text: {text}\n\n"
    with assistant():
        lm += "Name: " + gen("name", regex=r"[A-Za-z ]+", stop="\n") + "\n"
        lm += "Age: " + gen("age", regex=r"[0-9]+", max_tokens=3) + "\n"
        lm += "Occupation: " + gen("occupation", regex=r"[A-Za-z ]+", stop="\n") + "\n"
        lm += "Email: " + gen(
            "email",
            regex=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
            stop="\n"
        ) + "\n"
    return lm
 text = "John Smith is a 35-year-old software engineer. Contact: john@example.com"
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 with system():
    lm += "You extract structured information from text."
 with user():
    lm = extract_person_info(lm, text)
 print(f"Name: {lm['name']}")
 print(f"Age: {lm['age']}")
 print(f"Occupation: {lm['occupation']}")
 print(f"Email: {lm['email']}")
 ```
 ### Multi-Entity Extraction
 ```python
@guidance
 def extract_entities(lm, text):
    """Extract multiple entity types."""
    lm += f"Analyze: {text}\n\n"
    # Person entities
    lm += "People:\n"
    for i in range(3):  # Up to 3 people
        lm += f"- " + gen(f"person_{i}", regex=r"[A-Za-z ]+", stop="\n") + "\n"
    # Organization entities
    lm += "\nOrganizations:\n"
    for i in range(2):  # Up to 2 orgs
        lm += f"- " + gen(f"org_{i}", regex=r"[A-Za-z0-9 ]+", stop="\n") + "\n"
    # Dates
    lm += "\nDates:\n"
    for i in range(2):  # Up to 2 dates
        lm += f"- " + gen(f"date_{i}", regex=r"\d{4}-\d{2}-\d{2}", stop="\n") + "\n"
    # Locations
    lm += "\nLocations:\n"
    for i in range(2):  # Up to 2 locations
        lm += f"- " + gen(f"location_{i}", regex=r"[A-Za-z ]+", stop="\n") + "\n"
    return lm
 text = """
 Tim Cook and Satya Nadella met at Microsoft headquarters in Redmond on 2024-09-15
 to discuss the collaboration between Apple and Microsoft. The meeting continued
 in Cupertino on 2024-09-20.
 """
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = extract_entities(lm, text)
 ```
 ### Batch Extraction
 ```python
@guidance
 def batch_extract(lm, texts):
    """Extract from multiple texts."""
    lm += "Batch Extraction Results:\n\n"
    for i, text in enumerate(texts):
        lm += f"=== Item {i+1} ===\n"
        lm += f"Text: {text}\n"
        lm += "Name: " + gen(f"name_{i}", regex=r"[A-Za-z ]+", stop="\n") + "\n"
        lm += "Sentiment: " + gen(
            f"sentiment_{i}",
            regex=r"(positive|negative|neutral)",
            stop="\n"
        ) + "\n\n"
    return lm
 texts = [
    "Alice is happy with the product",
    "Bob is disappointed with the service",
    "Carol has no strong feelings either way"
 ]
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = batch_extract(lm, texts)
 ```
 ## Classification Systems
 ### Sentiment Analysis
 ```python
 from guidance import models, select, gen
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 text = "This product is absolutely amazing! Best purchase ever."
 lm += f"Text: {text}\n\n"
 lm += "Sentiment: " + select(
    ["positive", "negative", "neutral"],
    name="sentiment"
 )
 lm += "\nConfidence: " + gen("confidence", regex=r"[0-9]{1,3}") + "%\n"
 lm += "Reasoning: " + gen("reasoning", stop="\n", max_tokens=50)
 print(f"Sentiment: {lm['sentiment']}")
 print(f"Confidence: {lm['confidence']}%")
 print(f"Reasoning: {lm['reasoning']}")
 ```
 ### Multi-Label Classification
 ```python
@guidance
 def classify_article(lm, text):
    """Classify article with multiple labels."""
    lm += f"Article: {text}\n\n"
    # Primary category
    lm += "Primary Category: " + select(
        ["Technology", "Business", "Science", "Politics", "Entertainment"],
        name="primary_category"
    ) + "\n"
    # Secondary categories (up to 3)
    lm += "\nSecondary Categories:\n"
    categories = ["Technology", "Business", "Science", "Politics", "Entertainment"]
    for i in range(3):
        lm += f"{i+1}. " + select(categories, name=f"secondary_{i}") + "\n"
    # Tags
    lm += "\nTags: " + gen("tags", stop="\n", max_tokens=50) + "\n"
    # Target audience
    lm += "Target Audience: " + select(
        ["General", "Expert", "Beginner"],
        name="audience"
    )
    return lm
 article = """
 Apple announced new AI features in iOS 18, leveraging machine learning to improve
 battery life and performance. The company's stock rose 5% following the announcement.
 """
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = classify_article(lm, article)
 ```
 ### Intent Classification
 ```python
@guidance
 def classify_intent(lm, message):
    """Classify user intent."""
    lm += f"User Message: {message}\n\n"
    # Intent
    lm += "Intent: " + select(
        ["question", "complaint", "request", "feedback", "other"],
        name="intent"
    ) + "\n"
    # Urgency
    lm += "Urgency: " + select(
        ["low", "medium", "high", "critical"],
        name="urgency"
    ) + "\n"
    # Department
    lm += "Route To: " + select(
        ["support", "sales", "billing", "technical"],
        name="department"
    ) + "\n"
    # Sentiment
    lm += "Sentiment: " + select(
        ["positive", "neutral", "negative"],
        name="sentiment"
    )
    return lm
 message = "My account was charged twice for the same order. Need help ASAP!"
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = classify_intent(lm, message)
 print(f"Intent: {lm['intent']}")
 print(f"Urgency: {lm['urgency']}")
 print(f"Department: {lm['department']}")
 ```
 ## Agent Systems
 ### ReAct Agent
 ```python
 from guidance import models, gen, select, guidance
@guidance(stateless=False)
 def react_agent(lm, question, tools, max_rounds=5):
    """ReAct agent with tool use."""
    lm += f"Question: {question}\n\n"
    for round in range(max_rounds):
        # Thought
        lm += f"Thought {round+1}: " + gen("thought", stop="\n", max_tokens=100) + "\n"
        # Action selection
        lm += "Action: " + select(
            list(tools.keys()) + ["answer"],
            name="action"
        )
        if lm["action"] == "answer":
            lm += "\n\nFinal Answer: " + gen("answer", max_tokens=200)
            break
        # Action input
        lm += "\nAction Input: " + gen("action_input", stop="\n", max_tokens=100) + "\n"
        # Execute tool
        if lm["action"] in tools:
            try:
                result = tools[lm["action"]](lm["action_input"])
                lm += f"Observation: {result}\n\n"
            except Exception as e:
                lm += f"Observation: Error - {str(e)}\n\n"
    return lm
 # Define tools
 tools = {
    "calculator": lambda expr: eval(expr),
    "search": lambda query: f"Search results for '{query}': [Mock results]",
    "weather": lambda city: f"Weather in {city}: Sunny, 72°F"
 }
 # Use agent
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = react_agent(lm, "What is (25 * 4) + 10?", tools)
 print(lm["answer"])
 ```
 ### Multi-Agent System
 ```python
@guidance
 def coordinator_agent(lm, task):
    """Coordinator that delegates to specialists."""
    lm += f"Task: {task}\n\n"
    # Determine which specialist to use
    lm += "Specialist: " + select(
        ["researcher", "writer", "coder", "analyst"],
        name="specialist"
    ) + "\n"
    lm += "Reasoning: " + gen("reasoning", stop="\n", max_tokens=100) + "\n"
    return lm
@guidance
 def researcher_agent(lm, query):
    """Research specialist."""
    lm += f"Research Query: {query}\n\n"
    lm += "Findings:\n"
    for i in range(3):
        lm += f"{i+1}. " + gen(f"finding_{i}", stop="\n", max_tokens=100) + "\n"
    return lm
@guidance
 def writer_agent(lm, topic):
    """Writing specialist."""
    lm += f"Topic: {topic}\n\n"
    lm += "Title: " + gen("title", stop="\n", max_tokens=50) + "\n"
    lm += "Content:\n" + gen("content", max_tokens=500)
    return lm
 # Coordination workflow
 task = "Write an article about AI safety"
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = coordinator_agent(lm, task)
 specialist = lm["specialist"]
 if specialist == "researcher":
    lm = researcher_agent(lm, task)
 elif specialist == "writer":
    lm = writer_agent(lm, task)
 ```
 ### Tool Use with Validation
 ```python
@guidance(stateless=False)
 def validated_tool_agent(lm, question):
    """Agent with validated tool calls."""
    tools = {
        "add": lambda a, b: float(a) + float(b),
        "multiply": lambda a, b: float(a) * float(b),
        "divide": lambda a, b: float(a) / float(b) if float(b) != 0 else "Error: Division by zero"
    }
    lm += f"Question: {question}\n\n"
    for i in range(5):
        # Select tool
        lm += "Tool: " + select(list(tools.keys()) + ["done"], name="tool")
        if lm["tool"] == "done":
            lm += "\nAnswer: " + gen("answer", max_tokens=100)
            break
        # Get validated numeric arguments
        lm += "\nArg1: " + gen("arg1", regex=r"-?[0-9]+(\.[0-9]+)?") + "\n"
        lm += "Arg2: " + gen("arg2", regex=r"-?[0-9]+(\.[0-9]+)?") + "\n"
        # Execute
        result = tools[lm["tool"]](lm["arg1"], lm["arg2"])
        lm += f"Result: {result}\n\n"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = validated_tool_agent(lm, "What is (10 + 5) * 3?")
 ```
 ## Multi-Step Workflows
 ### Chain of Thought
 ```python
@guidance
 def chain_of_thought(lm, question):
    """Multi-step reasoning with CoT."""
    lm += f"Question: {question}\n\n"
    # Generate reasoning steps
    lm += "Let me think step by step:\n\n"
    for i in range(4):
        lm += f"Step {i+1}: " + gen(f"step_{i+1}", stop="\n", max_tokens=100) + "\n"
    # Final answer
    lm += "\nTherefore, the answer is: " + gen("answer", stop="\n", max_tokens=50)
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = chain_of_thought(lm, "If a train travels 60 mph for 2.5 hours, how far does it go?")
 print(lm["answer"])
 ```
 ### Self-Consistency
 ```python
@guidance
 def self_consistency(lm, question, num_samples=3):
    """Generate multiple reasoning paths and aggregate."""
    lm += f"Question: {question}\n\n"
    answers = []
    for i in range(num_samples):
        lm += f"=== Attempt {i+1} ===\n"
        lm += "Reasoning: " + gen(f"reasoning_{i}", stop="\n", max_tokens=100) + "\n"
        lm += "Answer: " + gen(f"answer_{i}", stop="\n", max_tokens=50) + "\n\n"
        answers.append(lm[f"answer_{i}"])
    # Aggregate (simple majority vote)
    from collections import Counter
    most_common = Counter(answers).most_common(1)[0][0]
    lm += f"Final Answer (by majority): {most_common}\n"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = self_consistency(lm, "What is 15% of 200?")
 ```
 ### Planning and Execution
 ```python
@guidance
 def plan_and_execute(lm, goal):
    """Plan tasks then execute them."""
    lm += f"Goal: {goal}\n\n"
    # Planning phase
    lm += "Plan:\n"
    num_steps = 4
    for i in range(num_steps):
        lm += f"{i+1}. " + gen(f"plan_step_{i}", stop="\n", max_tokens=100) + "\n"
    # Execution phase
    lm += "\nExecution:\n\n"
    for i in range(num_steps):
        lm += f"Step {i+1}: {lm[f'plan_step_{i}']}\n"
        lm += "Status: " + select(["completed", "in-progress", "blocked"], name=f"status_{i}") + "\n"
        lm += "Result: " + gen(f"result_{i}", stop="\n", max_tokens=150) + "\n\n"
    # Summary
    lm += "Summary: " + gen("summary", max_tokens=200)
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = plan_and_execute(lm, "Build a REST API for a blog platform")
 ```
 ## Code Generation
 ### Python Function
 ```python
@guidance
 def generate_python_function(lm, description):
    """Generate Python function from description."""
    lm += f"Description: {description}\n\n"
    # Function signature
    lm += "def " + gen("func_name", regex=r"[a-z_][a-z0-9_]*") + "("
    lm += gen("params", regex=r"[a-z_][a-z0-9_]*(, [a-z_][a-z0-9_]*)*") + "):\n"
    # Docstring
    lm += '    """' + gen("docstring", stop='"""', max_tokens=100) + '"""\n'
    # Function body
    lm += "    " + gen("body", stop="\n", max_tokens=200) + "\n"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = generate_python_function(lm, "Check if a number is prime")
 print(lm)
 ```
 ### SQL Query
 ```python
@guidance
 def generate_sql(lm, description):
    """Generate SQL query from description."""
    lm += f"Description: {description}\n\n"
    lm += "SQL Query:\n"
    # SELECT clause
    lm += "SELECT " + gen("select_clause", stop=" FROM", max_tokens=100)
    # FROM clause
    lm += " FROM " + gen("from_clause", stop=" WHERE", max_tokens=50)
    # WHERE clause (optional)
    lm += " WHERE " + gen("where_clause", stop=";", max_tokens=100) + ";"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = generate_sql(lm, "Get all users who signed up in the last 30 days")
 ```
 ### API Endpoint
 ```python
@guidance
 def generate_api_endpoint(lm, description):
    """Generate REST API endpoint."""
    lm += f"Description: {description}\n\n"
    # HTTP method
    lm += "Method: " + select(["GET", "POST", "PUT", "DELETE"], name="method") + "\n"
    # Path
    lm += "Path: /" + gen("path", regex=r"[a-z0-9/-]+", stop="\n") + "\n"
    # Request body (if POST/PUT)
    if lm["method"] in ["POST", "PUT"]:
        lm += "\nRequest Body:\n"
        lm += "{\n"
        lm += '  "field1": ' + gen("field1", regex=r'"[a-z_]+"') + ",\n"
        lm += '  "field2": ' + gen("field2", regex=r'"[a-z_]+"') + "\n"
        lm += "}\n"
    # Response
    lm += "\nResponse (200 OK):\n"
    lm += "{\n"
    lm += '  "status": "success",\n'
    lm += '  "data": ' + gen("response_data", max_tokens=100) + "\n"
    lm += "}\n"
    return lm
 lm = models.Anthropic("claude-sonnet-4-5-20250929")
 lm = generate_api_endpoint(lm, "Create a new blog post")
 ```
 ## Production Tips
 ### Error Handling
 ```python
@guidance
 def safe_extraction(lm, text):
    """Extract with fallback handling."""
    try:
        lm += f"Text: {text}\n"
        lm += "Name: " + gen("name", regex=r"[A-Za-z ]+", stop="\n", max_tokens=30)
        return lm
    except Exception as e:
        # Fallback to less strict extraction
        lm += f"Text: {text}\n"
        lm += "Name: " + gen("name", stop="\n", max_tokens=30)
        return lm
 ```
 ### Caching
 ```python
 from functools import lru_cache
@lru_cache(maxsize=100)
 def cached_generation(text):
    """Cache LLM generations."""
    lm = models.Anthropic("claude-sonnet-4-5-20250929")
    lm += f"Analyze: {text}\n"
    lm += "Sentiment: " + select(["positive", "negative", "neutral"], name="sentiment")
    return lm["sentiment"]
 # First call: hits LLM
 result1 = cached_generation("This is great!")
 # Second call: returns cached result
 result2 = cached_generation("This is great!")  # Instant!
 ```
 ### Monitoring
 ```python
 import time
@guidance
 def monitored_generation(lm, text):
    """Track generation metrics."""
    start_time = time.time()
    lm += f"Text: {text}\n"
    lm += "Analysis: " + gen("analysis", max_tokens=100)
    elapsed = time.time() - start_time
    # Log metrics
    print(f"Generation time: {elapsed:.2f}s")
    print(f"Output length: {len(lm['analysis'])} chars")
    return lm
 ```
 ### Batch Processing
 ```python
 def batch_process(texts, batch_size=10):
    """Process texts in batches."""
    lm = models.Anthropic("claude-sonnet-4-5-20250929")
    results = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        for text in batch:
            lm += f"Text: {text}\n"
            lm += "Sentiment: " + select(
                ["positive", "negative", "neutral"],
                name=f"sentiment_{i}"
            ) + "\n\n"
        results.extend([lm[f"sentiment_{i}"] for i in range(len(batch))])
    return results
 ```
 ## Resources
 - **Guidance Notebooks**: https://github.com/guidance-ai/guidance/tree/main/notebooks
 - **Guidance Docs**: https://guidance.readthedocs.io
 - **Community Examples**: https://github.com/guidance-ai/guidance/discussions
--- a/skills/mlops/huggingface-tokenizers/SKILL.md
+++ b/skills/mlops/huggingface-tokenizers/SKILL.md
@ -0,0 +1,516 @@
 ---
 name: huggingface-tokenizers
 description: Fast tokenizers optimized for research and production. Rust-based implementation tokenizes 1GB in <20 seconds. Supports BPE, WordPiece, and Unigram algorithms. Train custom vocabularies, track alignments, handle padding/truncation. Integrates seamlessly with transformers. Use when you need high-performance tokenization or custom tokenizer training.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Tokenization, HuggingFace, BPE, WordPiece, Unigram, Fast Tokenization, Rust, Custom Tokenizer, Alignment Tracking, Production]
 dependencies: [tokenizers, transformers, datasets]
 ---
 # HuggingFace Tokenizers - Fast Tokenization for NLP
 Fast, production-ready tokenizers with Rust performance and Python ease-of-use.
 ## When to use HuggingFace Tokenizers
 **Use HuggingFace Tokenizers when:**
 - Need extremely fast tokenization (<20s per GB of text)
 - Training custom tokenizers from scratch
 - Want alignment tracking (token → original text position)
 - Building production NLP pipelines
 - Need to tokenize large corpora efficiently
 **Performance**:
 - **Speed**: <20 seconds to tokenize 1GB on CPU
 - **Implementation**: Rust core with Python/Node.js bindings
 - **Efficiency**: 10-100× faster than pure Python implementations
 **Use alternatives instead**:
 - **SentencePiece**: Language-independent, used by T5/ALBERT
 - **tiktoken**: OpenAI's BPE tokenizer for GPT models
 - **transformers AutoTokenizer**: Loading pretrained only (uses this library internally)
 ## Quick start
 ### Installation
 ```bash
 # Install tokenizers
 pip install tokenizers
 # With transformers integration
 pip install tokenizers transformers
 ```
 ### Load pretrained tokenizer
 ```python
 from tokenizers import Tokenizer
 # Load from HuggingFace Hub
 tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
 # Encode text
 output = tokenizer.encode("Hello, how are you?")
 print(output.tokens)  # ['hello', ',', 'how', 'are', 'you', '?']
 print(output.ids)     # [7592, 1010, 2129, 2024, 2017, 1029]
 # Decode back
 text = tokenizer.decode(output.ids)
 print(text)  # "hello, how are you?"
 ```
 ### Train custom BPE tokenizer
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 from tokenizers.pre_tokenizers import Whitespace
 # Initialize tokenizer with BPE model
 tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
 tokenizer.pre_tokenizer = Whitespace()
 # Configure trainer
 trainer = BpeTrainer(
    vocab_size=30000,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"],
    min_frequency=2
 )
 # Train on files
 files = ["train.txt", "validation.txt"]
 tokenizer.train(files, trainer)
 # Save
 tokenizer.save("my-tokenizer.json")
 ```
 **Training time**: ~1-2 minutes for 100MB corpus, ~10-20 minutes for 1GB
 ### Batch encoding with padding
 ```python
 # Enable padding
 tokenizer.enable_padding(pad_id=3, pad_token="[PAD]")
 # Encode batch
 texts = ["Hello world", "This is a longer sentence"]
 encodings = tokenizer.encode_batch(texts)
 for encoding in encodings:
    print(encoding.ids)
 # [101, 7592, 2088, 102, 3, 3, 3]
 # [101, 2023, 2003, 1037, 2936, 6251, 102]
 ```
 ## Tokenization algorithms
 ### BPE (Byte-Pair Encoding)
 **How it works**:
 1. Start with character-level vocabulary
 2. Find most frequent character pair
 3. Merge into new token, add to vocabulary
 4. Repeat until vocabulary size reached
 **Used by**: GPT-2, GPT-3, RoBERTa, BART, DeBERTa
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 from tokenizers.pre_tokenizers import ByteLevel
 tokenizer = Tokenizer(BPE(unk_token="<|endoftext|>"))
 tokenizer.pre_tokenizer = ByteLevel()
 trainer = BpeTrainer(
    vocab_size=50257,
    special_tokens=["<|endoftext|>"],
    min_frequency=2
 )
 tokenizer.train(files=["data.txt"], trainer=trainer)
 ```
 **Advantages**:
 - Handles OOV words well (breaks into subwords)
 - Flexible vocabulary size
 - Good for morphologically rich languages
 **Trade-offs**:
 - Tokenization depends on merge order
 - May split common words unexpectedly
 ### WordPiece
 **How it works**:
 1. Start with character vocabulary
 2. Score merge pairs: `frequency(pair) / (frequency(first) × frequency(second))`
 3. Merge highest scoring pair
 4. Repeat until vocabulary size reached
 **Used by**: BERT, DistilBERT, MobileBERT
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import WordPiece
 from tokenizers.trainers import WordPieceTrainer
 from tokenizers.pre_tokenizers import Whitespace
 from tokenizers.normalizers import BertNormalizer
 tokenizer = Tokenizer(WordPiece(unk_token="[UNK]"))
 tokenizer.normalizer = BertNormalizer(lowercase=True)
 tokenizer.pre_tokenizer = Whitespace()
 trainer = WordPieceTrainer(
    vocab_size=30522,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"],
    continuing_subword_prefix="##"
 )
 tokenizer.train(files=["corpus.txt"], trainer=trainer)
 ```
 **Advantages**:
 - Prioritizes meaningful merges (high score = semantically related)
 - Used successfully in BERT (state-of-the-art results)
 **Trade-offs**:
 - Unknown words become `[UNK]` if no subword match
 - Saves vocabulary, not merge rules (larger files)
 ### Unigram
 **How it works**:
 1. Start with large vocabulary (all substrings)
 2. Compute loss for corpus with current vocabulary
 3. Remove tokens with minimal impact on loss
 4. Repeat until vocabulary size reached
 **Used by**: ALBERT, T5, mBART, XLNet (via SentencePiece)
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import Unigram
 from tokenizers.trainers import UnigramTrainer
 tokenizer = Tokenizer(Unigram())
 trainer = UnigramTrainer(
    vocab_size=8000,
    special_tokens=["<unk>", "<s>", "</s>"],
    unk_token="<unk>"
 )
 tokenizer.train(files=["data.txt"], trainer=trainer)
 ```
 **Advantages**:
 - Probabilistic (finds most likely tokenization)
 - Works well for languages without word boundaries
 - Handles diverse linguistic contexts
 **Trade-offs**:
 - Computationally expensive to train
 - More hyperparameters to tune
 ## Tokenization pipeline
 Complete pipeline: **Normalization → Pre-tokenization → Model → Post-processing**
 ### Normalization
 Clean and standardize text:
 ```python
 from tokenizers.normalizers import NFD, StripAccents, Lowercase, Sequence
 tokenizer.normalizer = Sequence([
    NFD(),           # Unicode normalization (decompose)
    Lowercase(),     # Convert to lowercase
    StripAccents()   # Remove accents
 ])
 # Input: "Héllo WORLD"
 # After normalization: "hello world"
 ```
 **Common normalizers**:
 - `NFD`, `NFC`, `NFKD`, `NFKC` - Unicode normalization forms
 - `Lowercase()` - Convert to lowercase
 - `StripAccents()` - Remove accents (é → e)
 - `Strip()` - Remove whitespace
 - `Replace(pattern, content)` - Regex replacement
 ### Pre-tokenization
 Split text into word-like units:
 ```python
 from tokenizers.pre_tokenizers import Whitespace, Punctuation, Sequence, ByteLevel
 # Split on whitespace and punctuation
 tokenizer.pre_tokenizer = Sequence([
    Whitespace(),
    Punctuation()
 ])
 # Input: "Hello, world!"
 # After pre-tokenization: ["Hello", ",", "world", "!"]
 ```
 **Common pre-tokenizers**:
 - `Whitespace()` - Split on spaces, tabs, newlines
 - `ByteLevel()` - GPT-2 style byte-level splitting
 - `Punctuation()` - Isolate punctuation
 - `Digits(individual_digits=True)` - Split digits individually
 - `Metaspace()` - Replace spaces with ▁ (SentencePiece style)
 ### Post-processing
 Add special tokens for model input:
 ```python
 from tokenizers.processors import TemplateProcessing
 # BERT-style: [CLS] sentence [SEP]
 tokenizer.post_processor = TemplateProcessing(
    single="[CLS] $A [SEP]",
    pair="[CLS] $A [SEP] $B [SEP]",
    special_tokens=[
        ("[CLS]", 1),
        ("[SEP]", 2),
    ],
 )
 ```
 **Common patterns**:
 ```python
 # GPT-2: sentence <|endoftext|>
 TemplateProcessing(
    single="$A <|endoftext|>",
    special_tokens=[("<|endoftext|>", 50256)]
 )
 # RoBERTa: <s> sentence </s>
 TemplateProcessing(
    single="<s> $A </s>",
    pair="<s> $A </s> </s> $B </s>",
    special_tokens=[("<s>", 0), ("</s>", 2)]
 )
 ```
 ## Alignment tracking
 Track token positions in original text:
 ```python
 output = tokenizer.encode("Hello, world!")
 # Get token offsets
 for token, offset in zip(output.tokens, output.offsets):
    start, end = offset
    print(f"{token:10} → [{start:2}, {end:2}): {text[start:end]!r}")
 # Output:
 # hello      → [ 0,  5): 'Hello'
 # ,          → [ 5,  6): ','
 # world      → [ 7, 12): 'world'
 # !          → [12, 13): '!'
 ```
 **Use cases**:
 - Named entity recognition (map predictions back to text)
 - Question answering (extract answer spans)
 - Token classification (align labels to original positions)
 ## Integration with transformers
 ### Load with AutoTokenizer
 ```python
 from transformers import AutoTokenizer
 # AutoTokenizer automatically uses fast tokenizers
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 # Check if using fast tokenizer
 print(tokenizer.is_fast)  # True
 # Access underlying tokenizers.Tokenizer
 fast_tokenizer = tokenizer.backend_tokenizer
 print(type(fast_tokenizer))  # <class 'tokenizers.Tokenizer'>
 ```
 ### Convert custom tokenizer to transformers
 ```python
 from tokenizers import Tokenizer
 from transformers import PreTrainedTokenizerFast
 # Train custom tokenizer
 tokenizer = Tokenizer(BPE())
 # ... train tokenizer ...
 tokenizer.save("my-tokenizer.json")
 # Wrap for transformers
 transformers_tokenizer = PreTrainedTokenizerFast(
    tokenizer_file="my-tokenizer.json",
    unk_token="[UNK]",
    pad_token="[PAD]",
    cls_token="[CLS]",
    sep_token="[SEP]",
    mask_token="[MASK]"
 )
 # Use like any transformers tokenizer
 outputs = transformers_tokenizer(
    "Hello world",
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="pt"
 )
 ```
 ## Common patterns
 ### Train from iterator (large datasets)
 ```python
 from datasets import load_dataset
 # Load dataset
 dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")
 # Create batch iterator
 def batch_iterator(batch_size=1000):
    for i in range(0, len(dataset), batch_size):
        yield dataset[i:i + batch_size]["text"]
 # Train tokenizer
 tokenizer.train_from_iterator(
    batch_iterator(),
    trainer=trainer,
    length=len(dataset)  # For progress bar
 )
 ```
 **Performance**: Processes 1GB in ~10-20 minutes
 ### Enable truncation and padding
 ```python
 # Enable truncation
 tokenizer.enable_truncation(max_length=512)
 # Enable padding
 tokenizer.enable_padding(
    pad_id=tokenizer.token_to_id("[PAD]"),
    pad_token="[PAD]",
    length=512  # Fixed length, or None for batch max
 )
 # Encode with both
 output = tokenizer.encode("This is a long sentence that will be truncated...")
 print(len(output.ids))  # 512
 ```
 ### Multi-processing
 ```python
 from tokenizers import Tokenizer
 from multiprocessing import Pool
 # Load tokenizer
 tokenizer = Tokenizer.from_file("tokenizer.json")
 def encode_batch(texts):
    return tokenizer.encode_batch(texts)
 # Process large corpus in parallel
 with Pool(8) as pool:
    # Split corpus into chunks
    chunk_size = 1000
    chunks = [corpus[i:i+chunk_size] for i in range(0, len(corpus), chunk_size)]
    # Encode in parallel
    results = pool.map(encode_batch, chunks)
 ```
 **Speedup**: 5-8× with 8 cores
 ## Performance benchmarks
 ### Training speed
 | Corpus Size | BPE (30k vocab) | WordPiece (30k) | Unigram (8k) |
 |-------------|-----------------|-----------------|--------------|
 | 10 MB       | 15 sec          | 18 sec          | 25 sec       |
 | 100 MB      | 1.5 min         | 2 min           | 4 min        |
 | 1 GB        | 15 min          | 20 min          | 40 min       |
 **Hardware**: 16-core CPU, tested on English Wikipedia
 ### Tokenization speed
 | Implementation | 1 GB corpus | Throughput    |
 |----------------|-------------|---------------|
 | Pure Python    | ~20 minutes | ~50 MB/min    |
 | HF Tokenizers  | ~15 seconds | ~4 GB/min     |
 | **Speedup**    | **80×**     | **80×**       |
 **Test**: English text, average sentence length 20 words
 ### Memory usage
 | Task                    | Memory  |
 |-------------------------|---------|
 | Load tokenizer          | ~10 MB  |
 | Train BPE (30k vocab)   | ~200 MB |
 | Encode 1M sentences     | ~500 MB |
 ## Supported models
 Pre-trained tokenizers available via `from_pretrained()`:
 **BERT family**:
 - `bert-base-uncased`, `bert-large-cased`
 - `distilbert-base-uncased`
 - `roberta-base`, `roberta-large`
 **GPT family**:
 - `gpt2`, `gpt2-medium`, `gpt2-large`
 - `distilgpt2`
 **T5 family**:
 - `t5-small`, `t5-base`, `t5-large`
 - `google/flan-t5-xxl`
 **Other**:
 - `facebook/bart-base`, `facebook/mbart-large-cc25`
 - `albert-base-v2`, `albert-xlarge-v2`
 - `xlm-roberta-base`, `xlm-roberta-large`
 Browse all: https://huggingface.co/models?library=tokenizers
 ## References
 - **[Training Guide](references/training.md)** - Train custom tokenizers, configure trainers, handle large datasets
 - **[Algorithms Deep Dive](references/algorithms.md)** - BPE, WordPiece, Unigram explained in detail
 - **[Pipeline Components](references/pipeline.md)** - Normalizers, pre-tokenizers, post-processors, decoders
 - **[Transformers Integration](references/integration.md)** - AutoTokenizer, PreTrainedTokenizerFast, special tokens
 ## Resources
 - **Docs**: https://huggingface.co/docs/tokenizers
 - **GitHub**: https://github.com/huggingface/tokenizers ⭐ 9,000+
 - **Version**: 0.20.0+
 - **Course**: https://huggingface.co/learn/nlp-course/chapter6/1
 - **Paper**: BPE (Sennrich et al., 2016), WordPiece (Schuster & Nakajima, 2012)
--- a/skills/mlops/huggingface-tokenizers/references/algorithms.md
+++ b/skills/mlops/huggingface-tokenizers/references/algorithms.md
@ -0,0 +1,653 @@
 # Tokenization Algorithms Deep Dive
 Comprehensive explanation of BPE, WordPiece, and Unigram algorithms.
 ## Byte-Pair Encoding (BPE)
 ### Algorithm overview
 BPE iteratively merges the most frequent pair of tokens in a corpus.
 **Training process**:
 1. Initialize vocabulary with all characters
 2. Count frequency of all adjacent token pairs
 3. Merge most frequent pair into new token
 4. Add new token to vocabulary
 5. Update corpus with new token
 6. Repeat until vocabulary size reached
 ### Step-by-step example
 **Corpus**:
 ```
 low: 5
 lower: 2
 newest: 6
 widest: 3
 ```
 **Iteration 1**:
 ```
 Count pairs:
 'e' + 's': 9 (newest: 6, widest: 3)  ← most frequent
 'l' + 'o': 7
 'o' + 'w': 7
 ...
 Merge: 'e' + 's' → 'es'
 Updated corpus:
 low: 5
 lower: 2
 newest: 6 → newes|t: 6
 widest: 3 → wides|t: 3
 Vocabulary: [a-z] + ['es']
 ```
 **Iteration 2**:
 ```
 Count pairs:
 'es' + 't': 9  ← most frequent
 'l' + 'o': 7
 ...
 Merge: 'es' + 't' → 'est'
 Updated corpus:
 low: 5
 lower: 2
 newest: 6 → new|est: 6
 widest: 3 → wid|est: 3
 Vocabulary: [a-z] + ['es', 'est']
 ```
 **Continue until desired vocabulary size...**
 ### Tokenization with trained BPE
 Given vocabulary: `['l', 'o', 'w', 'e', 'r', 'n', 's', 't', 'i', 'd', 'es', 'est', 'lo', 'low', 'ne', 'new', 'newest', 'wi', 'wid', 'widest']`
 Tokenize "lowest":
 ```
 Step 1: Split into characters
 ['l', 'o', 'w', 'e', 's', 't']
 Step 2: Apply merges in order learned during training
 - Merge 'l' + 'o' → 'lo' (if this merge was learned)
 - Merge 'lo' + 'w' → 'low' (if learned)
 - Merge 'e' + 's' → 'es' (learned)
 - Merge 'es' + 't' → 'est' (learned)
 Final: ['low', 'est']
 ```
 ### Implementation
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 from tokenizers.pre_tokenizers import Whitespace
 # Initialize
 tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
 tokenizer.pre_tokenizer = Whitespace()
 # Configure trainer
 trainer = BpeTrainer(
    vocab_size=1000,
    min_frequency=2,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
 )
 # Train
 corpus = [
    "This is a sample corpus for BPE training.",
    "BPE learns subword units from the training data.",
    # ... more sentences
 ]
 tokenizer.train_from_iterator(corpus, trainer=trainer)
 # Use
 output = tokenizer.encode("This is tokenization")
 print(output.tokens)  # ['This', 'is', 'token', 'ization']
 ```
 ### Byte-level BPE (GPT-2 variant)
 **Problem**: Standard BPE has limited character coverage (256+ Unicode chars)
 **Solution**: Operate on byte level (256 bytes)
 ```python
 from tokenizers.pre_tokenizers import ByteLevel
 from tokenizers.decoders import ByteLevel as ByteLevelDecoder
 tokenizer = Tokenizer(BPE())
 # Byte-level pre-tokenization
 tokenizer.pre_tokenizer = ByteLevel()
 tokenizer.decoder = ByteLevelDecoder()
 # This handles ALL possible characters, including emojis
 text = "Hello 🌍 世界"
 tokens = tokenizer.encode(text).tokens
 ```
 **Advantages**:
 - Handles any Unicode character (256 byte coverage)
 - No unknown tokens (worst case: bytes)
 - Used by GPT-2, GPT-3, BART
 **Trade-offs**:
 - Slightly worse compression (bytes vs characters)
 - More tokens for non-ASCII text
 ### BPE variants
 **SentencePiece BPE**:
 - Language-independent (no pre-tokenization)
 - Treats input as raw byte stream
 - Used by T5, ALBERT, XLNet
 **Robust BPE**:
 - Dropout during training (randomly skip merges)
 - More robust tokenization at inference
 - Reduces overfitting to training data
 ## WordPiece
 ### Algorithm overview
 WordPiece is similar to BPE but uses a different merge selection criterion.
 **Training process**:
 1. Initialize vocabulary with all characters
 2. Count frequency of all token pairs
 3. Score each pair: `score = freq(pair) / (freq(first) × freq(second))`
 4. Merge pair with highest score
 5. Repeat until vocabulary size reached
 ### Why different scoring?
 **BPE**: Merges most frequent pairs
 - "aa" appears 100 times → high priority
 - Even if 'a' appears 1000 times alone
 **WordPiece**: Merges pairs that are semantically related
 - "aa" appears 100 times, 'a' appears 1000 times → low score (100 / (1000 × 1000))
 - "th" appears 50 times, 't' appears 60 times, 'h' appears 55 times → high score (50 / (60 × 55))
 - Prioritizes pairs that appear together more than expected
 ### Step-by-step example
 **Corpus**:
 ```
 low: 5
 lower: 2
 newest: 6
 widest: 3
 ```
 **Iteration 1**:
 ```
 Count frequencies:
 'e': 11 (lower: 2, newest: 6, widest: 3)
 's': 9
 't': 9
 ...
 Count pairs:
 'e' + 's': 9 (newest: 6, widest: 3)
 'es' + 't': 9 (newest: 6, widest: 3)
 ...
 Compute scores:
 score('e' + 's') = 9 / (11 × 9) = 0.091
 score('es' + 't') = 9 / (9 × 9) = 0.111  ← highest score
 score('l' + 'o') = 7 / (7 × 9) = 0.111   ← tied
 Choose: 'es' + 't' → 'est' (or 'lo' if tied)
 ```
 **Key difference**: WordPiece prioritizes rare combinations over frequent ones.
 ### Tokenization with WordPiece
 Given vocabulary: `['##e', '##s', '##t', 'l', 'o', 'w', 'new', 'est', 'low']`
 Tokenize "lowest":
 ```
 Step 1: Find longest matching prefix
 'lowest' → 'low' (matches)
 Step 2: Find longest match for remainder
 'est' → 'est' (matches)
 Final: ['low', 'est']
 ```
 **If no match**:
 ```
 Tokenize "unknownword":
 'unknownword' → no match
 'unknown' → no match
 'unkn' → no match
 'un' → no match
 'u' → no match
 → [UNK]
 ```
 ### Implementation
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import WordPiece
 from tokenizers.trainers import WordPieceTrainer
 from tokenizers.normalizers import BertNormalizer
 from tokenizers.pre_tokenizers import BertPreTokenizer
 # Initialize BERT-style tokenizer
 tokenizer = Tokenizer(WordPiece(unk_token="[UNK]"))
 # Normalization (lowercase, accent stripping)
 tokenizer.normalizer = BertNormalizer(lowercase=True)
 # Pre-tokenization (whitespace + punctuation)
 tokenizer.pre_tokenizer = BertPreTokenizer()
 # Configure trainer
 trainer = WordPieceTrainer(
    vocab_size=30522,  # BERT vocab size
    min_frequency=2,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"],
    continuing_subword_prefix="##"  # BERT uses ##
 )
 # Train
 tokenizer.train_from_iterator(corpus, trainer=trainer)
 # Use
 output = tokenizer.encode("Tokenization works great!")
 print(output.tokens)  # ['token', '##ization', 'works', 'great', '!']
 ```
 ### Subword prefix
 **BERT uses `##` prefix**:
 ```
 "unbelievable" → ['un', '##believ', '##able']
 ```
 **Why?**
 - Indicates token is a continuation
 - Allows reconstruction: remove ##, concatenate
 - Helps model distinguish word boundaries
 ### WordPiece advantages
 **Semantic merges**:
 - Prioritizes meaningful combinations
 - "qu" has high score (always together)
 - "qx" has low score (rare combination)
 **Better for morphology**:
 - Captures affixes: un-, -ing, -ed
 - Preserves word stems
 **Trade-offs**:
 - Slower training than BPE
 - More memory (stores vocabulary, not merges)
 - Original implementation not open-source (HF reimplementation)
 ## Unigram
 ### Algorithm overview
 Unigram works backward: start with large vocabulary, remove tokens.
 **Training process**:
 1. Initialize with large vocabulary (all substrings)
 2. Estimate probability of each token (frequency-based)
 3. For each token, compute loss increase if removed
 4. Remove 10-20% of tokens with lowest loss impact
 5. Re-estimate probabilities
 6. Repeat until desired vocabulary size
 ### Probabilistic tokenization
 **Unigram assumption**: Each token is independent.
 Given vocabulary with probabilities:
 ```
 P('low') = 0.02
 P('l') = 0.01
 P('o') = 0.015
 P('w') = 0.01
 P('est') = 0.03
 P('e') = 0.02
 P('s') = 0.015
 P('t') = 0.015
 ```
 Tokenize "lowest":
 ```
 Option 1: ['low', 'est']
 P = P('low') × P('est') = 0.02 × 0.03 = 0.0006
 Option 2: ['l', 'o', 'w', 'est']
 P = 0.01 × 0.015 × 0.01 × 0.03 = 0.000000045
 Option 3: ['low', 'e', 's', 't']
 P = 0.02 × 0.02 × 0.015 × 0.015 = 0.0000009
 Choose option 1 (highest probability)
 ```
 ### Viterbi algorithm
 Finding best tokenization is expensive (exponential possibilities).
 **Viterbi algorithm** (dynamic programming):
 ```python
 def tokenize_viterbi(word, vocab, probs):
    n = len(word)
    # dp[i] = (best_prob, best_tokens) for word[:i]
    dp = [{} for _ in range(n + 1)]
    dp[0] = (0.0, [])  # log probability
    for i in range(1, n + 1):
        best_prob = float('-inf')
        best_tokens = []
        # Try all possible last tokens
        for j in range(i):
            token = word[j:i]
            if token in vocab:
                prob = dp[j][0] + log(probs[token])
                if prob > best_prob:
                    best_prob = prob
                    best_tokens = dp[j][1] + [token]
        dp[i] = (best_prob, best_tokens)
    return dp[n][1]
 ```
 **Time complexity**: O(n² × vocab_size) vs O(2^n) brute force
 ### Implementation
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import Unigram
 from tokenizers.trainers import UnigramTrainer
 # Initialize
 tokenizer = Tokenizer(Unigram())
 # Configure trainer
 trainer = UnigramTrainer(
    vocab_size=8000,
    special_tokens=["<unk>", "<s>", "</s>"],
    unk_token="<unk>",
    max_piece_length=16,      # Max token length
    n_sub_iterations=2,       # EM iterations
    shrinking_factor=0.75     # Remove 25% each iteration
 )
 # Train
 tokenizer.train_from_iterator(corpus, trainer=trainer)
 # Use
 output = tokenizer.encode("Tokenization with Unigram")
 print(output.tokens)  # ['▁Token', 'ization', '▁with', '▁Un', 'igram']
 ```
 ### Unigram advantages
 **Probabilistic**:
 - Multiple valid tokenizations
 - Can sample different tokenizations (data augmentation)
 **Subword regularization**:
 ```python
 # Sample different tokenizations
 for _ in range(3):
    tokens = tokenizer.encode("tokenization", is_pretokenized=False).tokens
    print(tokens)
 # Output (different each time):
 # ['token', 'ization']
 # ['tok', 'en', 'ization']
 # ['token', 'iz', 'ation']
 ```
 **Language-independent**:
 - No word boundaries needed
 - Works for CJK languages (Chinese, Japanese, Korean)
 - Treats input as character stream
 **Trade-offs**:
 - Slower training (EM algorithm)
 - More hyperparameters
 - Larger model (stores probabilities)
 ## Algorithm comparison
 ### Training speed
 | Algorithm  | Small (10MB) | Medium (100MB) | Large (1GB) |
 |------------|--------------|----------------|-------------|
 | BPE        | 10-15 sec    | 1-2 min        | 10-20 min   |
 | WordPiece  | 15-20 sec    | 2-3 min        | 15-30 min   |
 | Unigram    | 20-30 sec    | 3-5 min        | 30-60 min   |
 **Tested on**: 16-core CPU, 30k vocab
 ### Tokenization quality
 Tested on English Wikipedia (perplexity measurement):
 | Algorithm  | Vocab Size | Tokens/Word | Unknown Rate |
 |------------|------------|-------------|--------------|
 | BPE        | 30k        | 1.3         | 0.5%         |
 | WordPiece  | 30k        | 1.2         | 1.2%         |
 | Unigram    | 8k         | 1.5         | 0.3%         |
 **Key observations**:
 - WordPiece: Slightly better compression
 - BPE: Lower unknown rate
 - Unigram: Smallest vocab, good coverage
 ### Compression ratio
 Characters per token (higher = better compression):
 | Language | BPE (30k) | WordPiece (30k) | Unigram (8k) |
 |----------|-----------|-----------------|--------------|
 | English  | 4.2       | 4.5             | 3.8          |
 | Chinese  | 2.1       | 2.3             | 2.5          |
 | Arabic   | 3.5       | 3.8             | 3.2          |
 **Best for each**:
 - English: WordPiece
 - Chinese: Unigram (language-independent)
 - Arabic: WordPiece
 ### Use case recommendations
 **BPE** - Best for:
 - English language models
 - Code (handles symbols well)
 - Fast training needed
 - **Models**: GPT-2, GPT-3, RoBERTa, BART
 **WordPiece** - Best for:
 - Masked language modeling (BERT-style)
 - Morphologically rich languages
 - Semantic understanding tasks
 - **Models**: BERT, DistilBERT, ELECTRA
 **Unigram** - Best for:
 - Multilingual models
 - Languages without word boundaries (CJK)
 - Data augmentation via subword regularization
 - **Models**: T5, ALBERT, XLNet (via SentencePiece)
 ## Advanced topics
 ### Handling rare words
 **BPE approach**:
 ```
 "antidisestablishmentarianism"
 → ['anti', 'dis', 'establish', 'ment', 'arian', 'ism']
 ```
 **WordPiece approach**:
 ```
 "antidisestablishmentarianism"
 → ['anti', '##dis', '##establish', '##ment', '##arian', '##ism']
 ```
 **Unigram approach**:
 ```
 "antidisestablishmentarianism"
 → ['▁anti', 'dis', 'establish', 'ment', 'arian', 'ism']
 ```
 ### Handling numbers
 **Challenge**: Infinite number combinations
 **BPE solution**: Byte-level (handles any digit sequence)
 ```python
 tokenizer = Tokenizer(BPE())
 tokenizer.pre_tokenizer = ByteLevel()
 # Handles any number
 "123456789" → byte-level tokens
 ```
 **WordPiece solution**: Digit pre-tokenization
 ```python
 from tokenizers.pre_tokenizers import Digits
 # Split digits individually or as groups
 tokenizer.pre_tokenizer = Digits(individual_digits=True)
 "123" → ['1', '2', '3']
 ```
 **Unigram solution**: Learns common number patterns
 ```python
 # Learns patterns during training
 "2023" → ['202', '3'] or ['20', '23']
 ```
 ### Handling case sensitivity
 **Lowercase (BERT)**:
 ```python
 from tokenizers.normalizers import Lowercase
 tokenizer.normalizer = Lowercase()
 "Hello WORLD" → "hello world" → ['hello', 'world']
 ```
 **Preserve case (GPT-2)**:
 ```python
 # No case normalization
 tokenizer.normalizer = None
 "Hello WORLD" → ['Hello', 'WORLD']
 ```
 **Cased tokens (RoBERTa)**:
 ```python
 # Learns separate tokens for different cases
 Vocabulary: ['Hello', 'hello', 'HELLO', 'world', 'WORLD']
 ```
 ### Handling emojis and special characters
 **Byte-level (GPT-2)**:
 ```python
 tokenizer.pre_tokenizer = ByteLevel()
 "Hello 🌍 👋" → byte-level representation (always works)
 ```
 **Unicode normalization**:
 ```python
 from tokenizers.normalizers import NFKC
 tokenizer.normalizer = NFKC()
 "é" (composed) ↔ "é" (decomposed) → normalized to one form
 ```
 ## Troubleshooting
 ### Issue: Poor subword splitting
 **Symptom**:
 ```
 "running" → ['r', 'u', 'n', 'n', 'i', 'n', 'g']  (too granular)
 ```
 **Solutions**:
 1. Increase vocabulary size
 2. Train longer (more merge iterations)
 3. Lower `min_frequency` threshold
 ### Issue: Too many unknown tokens
 **Symptom**:
 ```
 5% of tokens are [UNK]
 ```
 **Solutions**:
 1. Increase vocabulary size
 2. Use byte-level BPE (no UNK possible)
 3. Verify training corpus is representative
 ### Issue: Inconsistent tokenization
 **Symptom**:
 ```
 "running" → ['run', 'ning']
 "runner" → ['r', 'u', 'n', 'n', 'e', 'r']
 ```
 **Solutions**:
 1. Check normalization consistency
 2. Ensure pre-tokenization is deterministic
 3. Use Unigram for probabilistic variance
 ## Best practices
 1. **Match algorithm to model architecture**:
   - BERT-style → WordPiece
   - GPT-style → BPE
   - T5-style → Unigram
 2. **Use byte-level for multilingual**:
   - Handles any Unicode
   - No unknown tokens
 3. **Test on representative data**:
   - Measure compression ratio
   - Check unknown token rate
   - Inspect sample tokenizations
 4. **Version control tokenizers**:
   - Save with model
   - Document special tokens
   - Track vocabulary changes
--- a/skills/mlops/huggingface-tokenizers/references/integration.md
+++ b/skills/mlops/huggingface-tokenizers/references/integration.md
@ -0,0 +1,637 @@
 # Transformers Integration
 Complete guide to using HuggingFace Tokenizers with the Transformers library.
 ## AutoTokenizer
 The easiest way to load tokenizers.
 ### Loading pretrained tokenizers
 ```python
 from transformers import AutoTokenizer
 # Load from HuggingFace Hub
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 # Check if using fast tokenizer (Rust-based)
 print(tokenizer.is_fast)  # True
 # Access underlying tokenizers.Tokenizer
 if tokenizer.is_fast:
    fast_tokenizer = tokenizer.backend_tokenizer
    print(type(fast_tokenizer))  # <class 'tokenizers.Tokenizer'>
 ```
 ### Fast vs slow tokenizers
 | Feature                  | Fast (Rust)    | Slow (Python) |
 |--------------------------|----------------|---------------|
 | Speed                    | 5-10× faster   | Baseline      |
 | Alignment tracking       | ✅ Full support | ❌ Limited     |
 | Batch processing         | ✅ Optimized    | ⚠️ Slower      |
 | Offset mapping           | ✅ Yes          | ❌ No          |
 | Installation             | `tokenizers`   | Built-in      |
 **Always use fast tokenizers when available.**
 ### Check available tokenizers
 ```python
 from transformers import TOKENIZER_MAPPING
 # List all fast tokenizers
 for config_class, (slow, fast) in TOKENIZER_MAPPING.items():
    if fast is not None:
        print(f"{config_class.__name__}: {fast.__name__}")
 ```
 ## PreTrainedTokenizerFast
 Wrap custom tokenizers for transformers.
 ### Convert custom tokenizer
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 from transformers import PreTrainedTokenizerFast
 # Train custom tokenizer
 tokenizer = Tokenizer(BPE())
 trainer = BpeTrainer(
    vocab_size=30000,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
 )
 tokenizer.train(files=["corpus.txt"], trainer=trainer)
 # Save tokenizer
 tokenizer.save("my-tokenizer.json")
 # Wrap for transformers
 transformers_tokenizer = PreTrainedTokenizerFast(
    tokenizer_file="my-tokenizer.json",
    unk_token="[UNK]",
    sep_token="[SEP]",
    pad_token="[PAD]",
    cls_token="[CLS]",
    mask_token="[MASK]"
 )
 # Save in transformers format
 transformers_tokenizer.save_pretrained("my-tokenizer")
 ```
 **Result**: Directory with `tokenizer.json` + `tokenizer_config.json` + `special_tokens_map.json`
 ### Use like any transformers tokenizer
 ```python
 # Load
 from transformers import AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained("my-tokenizer")
 # Encode with all transformers features
 outputs = tokenizer(
    "Hello world",
    padding="max_length",
    truncation=True,
    max_length=128,
    return_tensors="pt"
 )
 print(outputs.keys())
 # dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])
 ```
 ## Special tokens
 ### Default special tokens
 | Model Family | CLS/BOS | SEP/EOS       | PAD     | UNK     | MASK    |
 |--------------|---------|---------------|---------|---------|---------|
 | BERT         | [CLS]   | [SEP]         | [PAD]   | [UNK]   | [MASK]  |
 | GPT-2        | -       | <\|endoftext\|> | <\|endoftext\|> | <\|endoftext\|> | -       |
 | RoBERTa      | <s>     | </s>          | <pad>   | <unk>   | <mask>  |
 | T5           | -       | </s>          | <pad>   | <unk>   | -       |
 ### Adding special tokens
 ```python
 # Add new special tokens
 special_tokens_dict = {
    "additional_special_tokens": ["<|image|>", "<|video|>", "<|audio|>"]
 }
 num_added_tokens = tokenizer.add_special_tokens(special_tokens_dict)
 print(f"Added {num_added_tokens} tokens")
 # Resize model embeddings
 model.resize_token_embeddings(len(tokenizer))
 # Use new tokens
 text = "This is an image: <|image|>"
 tokens = tokenizer.encode(text)
 ```
 ### Adding regular tokens
 ```python
 # Add domain-specific tokens
 new_tokens = ["COVID-19", "mRNA", "vaccine"]
 num_added = tokenizer.add_tokens(new_tokens)
 # These are NOT special tokens (can be split if needed)
 tokenizer.add_tokens(new_tokens, special_tokens=False)
 # These ARE special tokens (never split)
 tokenizer.add_tokens(new_tokens, special_tokens=True)
 ```
 ## Encoding and decoding
 ### Basic encoding
 ```python
 # Single sentence
 text = "Hello, how are you?"
 encoded = tokenizer(text)
 print(encoded)
 # {'input_ids': [101, 7592, 1010, 2129, 2024, 2017, 1029, 102],
 #  'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0],
 #  'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}
 ```
 ### Batch encoding
 ```python
 # Multiple sentences
 texts = ["Hello world", "How are you?", "I am fine"]
 encoded = tokenizer(texts, padding=True, truncation=True, max_length=10)
 print(encoded['input_ids'])
 # [[101, 7592, 2088, 102, 0, 0, 0, 0, 0, 0],
 #  [101, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0],
 #  [101, 1045, 2572, 2986, 102, 0, 0, 0, 0, 0]]
 ```
 ### Return tensors
 ```python
 # Return PyTorch tensors
 outputs = tokenizer("Hello world", return_tensors="pt")
 print(outputs['input_ids'].shape)  # torch.Size([1, 5])
 # Return TensorFlow tensors
 outputs = tokenizer("Hello world", return_tensors="tf")
 # Return NumPy arrays
 outputs = tokenizer("Hello world", return_tensors="np")
 # Return lists (default)
 outputs = tokenizer("Hello world", return_tensors=None)
 ```
 ### Decoding
 ```python
 # Decode token IDs
 ids = [101, 7592, 2088, 102]
 text = tokenizer.decode(ids)
 print(text)  # "[CLS] hello world [SEP]"
 # Skip special tokens
 text = tokenizer.decode(ids, skip_special_tokens=True)
 print(text)  # "hello world"
 # Batch decode
 batch_ids = [[101, 7592, 102], [101, 2088, 102]]
 texts = tokenizer.batch_decode(batch_ids, skip_special_tokens=True)
 print(texts)  # ["hello", "world"]
 ```
 ## Padding and truncation
 ### Padding strategies
 ```python
 # Pad to max length in batch
 tokenizer(texts, padding="longest")
 # Pad to model max length
 tokenizer(texts, padding="max_length", max_length=128)
 # No padding
 tokenizer(texts, padding=False)
 # Pad to multiple of value (for efficient computation)
 tokenizer(texts, padding="max_length", max_length=128, pad_to_multiple_of=8)
 # Result: length will be 128 (already multiple of 8)
 ```
 ### Truncation strategies
 ```python
 # Truncate to max length
 tokenizer(text, truncation=True, max_length=10)
 # Only truncate first sequence (for pairs)
 tokenizer(text1, text2, truncation="only_first", max_length=20)
 # Only truncate second sequence
 tokenizer(text1, text2, truncation="only_second", max_length=20)
 # Truncate longest first (default for pairs)
 tokenizer(text1, text2, truncation="longest_first", max_length=20)
 # No truncation (error if too long)
 tokenizer(text, truncation=False)
 ```
 ### Stride for long documents
 ```python
 # For documents longer than max_length
 text = "Very long document " * 1000
 # Encode with overlap
 encodings = tokenizer(
    text,
    max_length=512,
    stride=128,          # Overlap between chunks
    truncation=True,
    return_overflowing_tokens=True,
    return_offsets_mapping=True
 )
 # Get all chunks
 num_chunks = len(encodings['input_ids'])
 print(f"Split into {num_chunks} chunks")
 # Each chunk overlaps by stride tokens
 for i, chunk in enumerate(encodings['input_ids']):
    print(f"Chunk {i}: {len(chunk)} tokens")
 ```
 **Use case**: Long document QA, sliding window inference
 ## Alignment and offsets
 ### Offset mapping
 ```python
 # Get character offsets for each token
 encoded = tokenizer("Hello, world!", return_offsets_mapping=True)
 for token, (start, end) in zip(
    encoded.tokens(),
    encoded['offset_mapping'][0]
 ):
    print(f"{token:10s} → [{start:2d}, {end:2d})")
 # Output:
 # [CLS]      → [ 0,  0)
 # Hello      → [ 0,  5)
 # ,          → [ 5,  6)
 # world      → [ 7, 12)
 # !          → [12, 13)
 # [SEP]      → [ 0,  0)
 ```
 ### Word IDs
 ```python
 # Get word index for each token
 encoded = tokenizer("Hello world", return_offsets_mapping=True)
 word_ids = encoded.word_ids()
 print(word_ids)
 # [None, 0, 1, None]
 # None = special token, 0 = first word, 1 = second word
 ```
 **Use case**: Token classification (NER, POS tagging)
 ### Character to token mapping
 ```python
 text = "Machine learning is awesome"
 encoded = tokenizer(text, return_offsets_mapping=True)
 # Find token for character position
 char_pos = 8  # "l" in "learning"
 token_idx = encoded.char_to_token(char_pos)
 print(f"Character {char_pos} is in token {token_idx}: {encoded.tokens()[token_idx]}")
 # Character 8 is in token 2: learning
 ```
 **Use case**: Question answering (map answer character span to tokens)
 ### Sequence pairs
 ```python
 # Encode sentence pair
 encoded = tokenizer("Question here", "Answer here", return_offsets_mapping=True)
 # Get sequence IDs (which sequence each token belongs to)
 sequence_ids = encoded.sequence_ids()
 print(sequence_ids)
 # [None, 0, 0, 0, None, 1, 1, 1, None]
 # None = special token, 0 = question, 1 = answer
 ```
 ## Model integration
 ### Use with transformers models
 ```python
 from transformers import AutoModel, AutoTokenizer
 import torch
 # Load model and tokenizer
 model = AutoModel.from_pretrained("bert-base-uncased")
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 # Tokenize
 text = "Hello world"
 inputs = tokenizer(text, return_tensors="pt")
 # Forward pass
 with torch.no_grad():
    outputs = model(**inputs)
 # Get embeddings
 last_hidden_state = outputs.last_hidden_state
 print(last_hidden_state.shape)  # [1, seq_len, hidden_size]
 ```
 ### Custom model with custom tokenizer
 ```python
 from transformers import BertConfig, BertModel
 # Train custom tokenizer
 from tokenizers import Tokenizer, models, trainers
 tokenizer = Tokenizer(models.BPE())
 trainer = trainers.BpeTrainer(vocab_size=30000)
 tokenizer.train(files=["data.txt"], trainer=trainer)
 # Wrap for transformers
 from transformers import PreTrainedTokenizerFast
 fast_tokenizer = PreTrainedTokenizerFast(
    tokenizer_object=tokenizer,
    unk_token="[UNK]",
    pad_token="[PAD]"
 )
 # Create model with custom vocab size
 config = BertConfig(vocab_size=30000)
 model = BertModel(config)
 # Use together
 inputs = fast_tokenizer("Hello world", return_tensors="pt")
 outputs = model(**inputs)
 ```
 ### Save and load together
 ```python
 # Save both
 model.save_pretrained("my-model")
 tokenizer.save_pretrained("my-model")
 # Directory structure:
 # my-model/
 #   ├── config.json
 #   ├── pytorch_model.bin
 #   ├── tokenizer.json
 #   ├── tokenizer_config.json
 #   └── special_tokens_map.json
 # Load both
 from transformers import AutoModel, AutoTokenizer
 model = AutoModel.from_pretrained("my-model")
 tokenizer = AutoTokenizer.from_pretrained("my-model")
 ```
 ## Advanced features
 ### Multimodal tokenization
 ```python
 from transformers import AutoTokenizer
 # LLaVA-style (image + text)
 tokenizer = AutoTokenizer.from_pretrained("llava-hf/llava-1.5-7b-hf")
 # Add image placeholder token
 tokenizer.add_special_tokens({"additional_special_tokens": ["<image>"]})
 # Use in prompt
 text = "Describe this image: <image>"
 inputs = tokenizer(text, return_tensors="pt")
 ```
 ### Template formatting
 ```python
 # Chat template
 messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"},
    {"role": "assistant", "content": "Hi! How can I help?"},
    {"role": "user", "content": "What's the weather?"}
 ]
 # Apply chat template (if tokenizer has one)
 if hasattr(tokenizer, "apply_chat_template"):
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer(text, return_tensors="pt")
 ```
 ### Custom template
 ```python
 from transformers import PreTrainedTokenizerFast
 tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
 # Define chat template
 tokenizer.chat_template = """
 {%- for message in messages %}
    {%- if message['role'] == 'system' %}
        System: {{ message['content'] }}\\n
    {%- elif message['role'] == 'user' %}
        User: {{ message['content'] }}\\n
    {%- elif message['role'] == 'assistant' %}
        Assistant: {{ message['content'] }}\\n
    {%- endif %}
 {%- endfor %}
 Assistant:
 """
 # Use template
 text = tokenizer.apply_chat_template(messages, tokenize=False)
 ```
 ## Performance optimization
 ### Batch processing
 ```python
 # Process large datasets efficiently
 from datasets import load_dataset
 dataset = load_dataset("imdb", split="train[:1000]")
 # Tokenize in batches
 def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=512
    )
 # Map over dataset (batched)
 tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    batch_size=1000,
    num_proc=4  # Parallel processing
 )
 ```
 ### Caching
 ```python
 # Enable caching for repeated tokenization
 tokenizer = AutoTokenizer.from_pretrained(
    "bert-base-uncased",
    use_fast=True,
    cache_dir="./cache"  # Cache tokenizer files
 )
 # Tokenize with caching
 from functools import lru_cache
@lru_cache(maxsize=10000)
 def cached_tokenize(text):
    return tuple(tokenizer.encode(text))
 # Reuses cached results for repeated inputs
 ```
 ### Memory efficiency
 ```python
 # For very large datasets, use streaming
 from datasets import load_dataset
 dataset = load_dataset("pile", split="train", streaming=True)
 def process_batch(batch):
    # Tokenize
    tokens = tokenizer(batch["text"], truncation=True, max_length=512)
    # Process tokens...
    return tokens
 # Process in chunks (memory efficient)
 for batch in dataset.batch(batch_size=1000):
    processed = process_batch(batch)
 ```
 ## Troubleshooting
 ### Issue: Tokenizer not fast
 **Symptom**:
 ```python
 tokenizer.is_fast  # False
 ```
 **Solution**: Install tokenizers library
 ```bash
 pip install tokenizers
 ```
 ### Issue: Special tokens not working
 **Symptom**: Special tokens are split into subwords
 **Solution**: Add as special tokens, not regular tokens
 ```python
 # Wrong
 tokenizer.add_tokens(["<|image|>"])
 # Correct
 tokenizer.add_special_tokens({"additional_special_tokens": ["<|image|>"]})
 ```
 ### Issue: Offset mapping not available
 **Symptom**:
 ```python
 tokenizer("text", return_offsets_mapping=True)
 # Error: return_offsets_mapping not supported
 ```
 **Solution**: Use fast tokenizer
 ```python
 from transformers import AutoTokenizer
 # Load fast version
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", use_fast=True)
 ```
 ### Issue: Padding inconsistent
 **Symptom**: Some sequences padded, others not
 **Solution**: Specify padding strategy
 ```python
 # Explicit padding
 tokenizer(
    texts,
    padding="max_length",  # or "longest"
    max_length=128
 )
 ```
 ## Best practices
 1. **Always use fast tokenizers**:
   - 5-10× faster
   - Full alignment tracking
   - Better batch processing
 2. **Save tokenizer with model**:
   - Ensures reproducibility
   - Prevents version mismatches
 3. **Use batch processing for datasets**:
   - Tokenize with `.map(batched=True)`
   - Set `num_proc` for parallelism
 4. **Enable caching for repeated inputs**:
   - Use `lru_cache` for inference
   - Cache tokenizer files with `cache_dir`
 5. **Handle special tokens properly**:
   - Use `add_special_tokens()` for never-split tokens
   - Resize embeddings after adding tokens
 6. **Test alignment for downstream tasks**:
   - Verify `offset_mapping` is correct
   - Test `char_to_token()` on samples
 7. **Version control tokenizer config**:
   - Save `tokenizer_config.json`
   - Document custom templates
   - Track vocabulary changes
--- a/skills/mlops/huggingface-tokenizers/references/pipeline.md
+++ b/skills/mlops/huggingface-tokenizers/references/pipeline.md
@ -0,0 +1,723 @@
 # Tokenization Pipeline Components
 Complete guide to normalizers, pre-tokenizers, models, post-processors, and decoders.
 ## Pipeline overview
 **Full tokenization pipeline**:
 ```
 Raw Text
  ↓
 Normalization (cleaning, lowercasing)
  ↓
 Pre-tokenization (split into words)
  ↓
 Model (apply BPE/WordPiece/Unigram)
  ↓
 Post-processing (add special tokens)
  ↓
 Token IDs
 ```
 **Decoding reverses the process**:
 ```
 Token IDs
  ↓
 Decoder (handle special encodings)
  ↓
 Raw Text
 ```
 ## Normalizers
 Clean and standardize input text.
 ### Common normalizers
 **Lowercase**:
 ```python
 from tokenizers.normalizers import Lowercase
 tokenizer.normalizer = Lowercase()
 # Input: "Hello WORLD"
 # Output: "hello world"
 ```
 **Unicode normalization**:
 ```python
 from tokenizers.normalizers import NFD, NFC, NFKD, NFKC
 # NFD: Canonical decomposition
 tokenizer.normalizer = NFD()
 # "é" → "e" + "́" (separate characters)
 # NFC: Canonical composition (default)
 tokenizer.normalizer = NFC()
 # "e" + "́" → "é" (composed)
 # NFKD: Compatibility decomposition
 tokenizer.normalizer = NFKD()
 # "ﬁ" → "f" + "i"
 # NFKC: Compatibility composition
 tokenizer.normalizer = NFKC()
 # Most aggressive normalization
 ```
 **Strip accents**:
 ```python
 from tokenizers.normalizers import StripAccents
 tokenizer.normalizer = StripAccents()
 # Input: "café"
 # Output: "cafe"
 ```
 **Whitespace handling**:
 ```python
 from tokenizers.normalizers import Strip, StripAccents
 # Remove leading/trailing whitespace
 tokenizer.normalizer = Strip()
 # Input: "  hello  "
 # Output: "hello"
 ```
 **Replace patterns**:
 ```python
 from tokenizers.normalizers import Replace
 # Replace newlines with spaces
 tokenizer.normalizer = Replace("\\n", " ")
 # Input: "hello\\nworld"
 # Output: "hello world"
 ```
 ### Combining normalizers
 ```python
 from tokenizers.normalizers import Sequence, NFD, Lowercase, StripAccents
 # BERT-style normalization
 tokenizer.normalizer = Sequence([
    NFD(),           # Unicode decomposition
    Lowercase(),     # Convert to lowercase
    StripAccents()   # Remove accents
 ])
 # Input: "Café au Lait"
 # After NFD: "Café au Lait" (e + ́)
 # After Lowercase: "café au lait"
 # After StripAccents: "cafe au lait"
 ```
 ### Use case examples
 **Case-insensitive model (BERT)**:
 ```python
 from tokenizers.normalizers import BertNormalizer
 # All-in-one BERT normalization
 tokenizer.normalizer = BertNormalizer(
    clean_text=True,        # Remove control characters
    handle_chinese_chars=True,  # Add spaces around Chinese
    strip_accents=True,     # Remove accents
    lowercase=True          # Lowercase
 )
 ```
 **Case-sensitive model (GPT-2)**:
 ```python
 # Minimal normalization
 tokenizer.normalizer = NFC()  # Only normalize Unicode
 ```
 **Multilingual (mBERT)**:
 ```python
 # Preserve scripts, normalize form
 tokenizer.normalizer = NFKC()
 ```
 ## Pre-tokenizers
 Split text into word-like units before tokenization.
 ### Whitespace splitting
 ```python
 from tokenizers.pre_tokenizers import Whitespace
 tokenizer.pre_tokenizer = Whitespace()
 # Input: "Hello world! How are you?"
 # Output: [("Hello", (0, 5)), ("world!", (6, 12)), ("How", (13, 16)), ("are", (17, 20)), ("you?", (21, 25))]
 ```
 ### Punctuation isolation
 ```python
 from tokenizers.pre_tokenizers import Punctuation
 tokenizer.pre_tokenizer = Punctuation()
 # Input: "Hello, world!"
 # Output: [("Hello", ...), (",", ...), ("world", ...), ("!", ...)]
 ```
 ### Byte-level (GPT-2)
 ```python
 from tokenizers.pre_tokenizers import ByteLevel
 tokenizer.pre_tokenizer = ByteLevel(add_prefix_space=True)
 # Input: "Hello world"
 # Output: Byte-level tokens with Ġ prefix for spaces
 # [("ĠHello", ...), ("Ġworld", ...)]
 ```
 **Key feature**: Handles ALL Unicode characters (256 byte combinations)
 ### Metaspace (SentencePiece)
 ```python
 from tokenizers.pre_tokenizers import Metaspace
 tokenizer.pre_tokenizer = Metaspace(replacement="▁", add_prefix_space=True)
 # Input: "Hello world"
 # Output: [("▁Hello", ...), ("▁world", ...)]
 ```
 **Used by**: T5, ALBERT (via SentencePiece)
 ### Digits splitting
 ```python
 from tokenizers.pre_tokenizers import Digits
 # Split digits individually
 tokenizer.pre_tokenizer = Digits(individual_digits=True)
 # Input: "Room 123"
 # Output: [("Room", ...), ("1", ...), ("2", ...), ("3", ...)]
 # Keep digits together
 tokenizer.pre_tokenizer = Digits(individual_digits=False)
 # Input: "Room 123"
 # Output: [("Room", ...), ("123", ...)]
 ```
 ### BERT pre-tokenizer
 ```python
 from tokenizers.pre_tokenizers import BertPreTokenizer
 tokenizer.pre_tokenizer = BertPreTokenizer()
 # Splits on whitespace and punctuation, preserves CJK
 # Input: "Hello, 世界!"
 # Output: [("Hello", ...), (",", ...), ("世", ...), ("界", ...), ("!", ...)]
 ```
 ### Combining pre-tokenizers
 ```python
 from tokenizers.pre_tokenizers import Sequence, Whitespace, Punctuation
 tokenizer.pre_tokenizer = Sequence([
    Whitespace(),     # Split on whitespace first
    Punctuation()     # Then isolate punctuation
 ])
 # Input: "Hello, world!"
 # After Whitespace: [("Hello,", ...), ("world!", ...)]
 # After Punctuation: [("Hello", ...), (",", ...), ("world", ...), ("!", ...)]
 ```
 ### Pre-tokenizer comparison
 | Pre-tokenizer     | Use Case                        | Example                                    |
 |-------------------|---------------------------------|--------------------------------------------|
 | Whitespace        | Simple English                  | "Hello world" → ["Hello", "world"]         |
 | Punctuation       | Isolate symbols                 | "world!" → ["world", "!"]                  |
 | ByteLevel         | Multilingual, emojis            | "🌍" → byte tokens                          |
 | Metaspace         | SentencePiece-style             | "Hello" → ["▁Hello"]                       |
 | BertPreTokenizer  | BERT-style (CJK aware)          | "世界" → ["世", "界"]                        |
 | Digits            | Handle numbers                  | "123" → ["1", "2", "3"] or ["123"]        |
 ## Models
 Core tokenization algorithms.
 ### BPE Model
 ```python
 from tokenizers.models import BPE
 model = BPE(
    vocab=None,           # Or provide pre-built vocab
    merges=None,          # Or provide merge rules
    unk_token="[UNK]",    # Unknown token
    continuing_subword_prefix="",
    end_of_word_suffix="",
    fuse_unk=False        # Keep unknown tokens separate
 )
 tokenizer = Tokenizer(model)
 ```
 **Parameters**:
 - `vocab`: Dict of token → id
 - `merges`: List of merge rules `["a b", "ab c"]`
 - `unk_token`: Token for unknown words
 - `continuing_subword_prefix`: Prefix for subwords (empty for GPT-2)
 - `end_of_word_suffix`: Suffix for last subword (empty for GPT-2)
 ### WordPiece Model
 ```python
 from tokenizers.models import WordPiece
 model = WordPiece(
    vocab=None,
    unk_token="[UNK]",
    max_input_chars_per_word=100,  # Max word length
    continuing_subword_prefix="##"  # BERT-style prefix
 )
 tokenizer = Tokenizer(model)
 ```
 **Key difference**: Uses `##` prefix for continuing subwords.
 ### Unigram Model
 ```python
 from tokenizers.models import Unigram
 model = Unigram(
    vocab=None,  # List of (token, score) tuples
    unk_id=0,    # ID for unknown token
    byte_fallback=False  # Fall back to bytes if no match
 )
 tokenizer = Tokenizer(model)
 ```
 **Probabilistic**: Selects tokenization with highest probability.
 ### WordLevel Model
 ```python
 from tokenizers.models import WordLevel
 # Simple word-to-ID mapping (no subwords)
 model = WordLevel(
    vocab=None,
    unk_token="[UNK]"
 )
 tokenizer = Tokenizer(model)
 ```
 **Warning**: Requires huge vocabulary (one token per word).
 ## Post-processors
 Add special tokens and format output.
 ### Template processing
 **BERT-style** (`[CLS] sentence [SEP]`):
 ```python
 from tokenizers.processors import TemplateProcessing
 tokenizer.post_processor = TemplateProcessing(
    single="[CLS] $A [SEP]",
    pair="[CLS] $A [SEP] $B [SEP]",
    special_tokens=[
        ("[CLS]", 101),
        ("[SEP]", 102),
    ],
 )
 # Single sentence
 output = tokenizer.encode("Hello world")
 # [101, ..., 102]  ([CLS] hello world [SEP])
 # Sentence pair
 output = tokenizer.encode("Hello", "world")
 # [101, ..., 102, ..., 102]  ([CLS] hello [SEP] world [SEP])
 ```
 **GPT-2 style** (`sentence <|endoftext|>`):
 ```python
 tokenizer.post_processor = TemplateProcessing(
    single="$A <|endoftext|>",
    special_tokens=[
        ("<|endoftext|>", 50256),
    ],
 )
 ```
 **RoBERTa style** (`<s> sentence </s>`):
 ```python
 tokenizer.post_processor = TemplateProcessing(
    single="<s> $A </s>",
    pair="<s> $A </s> </s> $B </s>",
    special_tokens=[
        ("<s>", 0),
        ("</s>", 2),
    ],
 )
 ```
 **T5 style** (no special tokens):
 ```python
 # T5 doesn't add special tokens via post-processor
 tokenizer.post_processor = None
 ```
 ### RobertaProcessing
 ```python
 from tokenizers.processors import RobertaProcessing
 tokenizer.post_processor = RobertaProcessing(
    sep=("</s>", 2),
    cls=("<s>", 0),
    add_prefix_space=True,  # Add space before first token
    trim_offsets=True       # Trim leading space from offsets
 )
 ```
 ### ByteLevelProcessing
 ```python
 from tokenizers.processors import ByteLevel as ByteLevelProcessing
 tokenizer.post_processor = ByteLevelProcessing(
    trim_offsets=True  # Remove Ġ from offsets
 )
 ```
 ## Decoders
 Convert token IDs back to text.
 ### ByteLevel decoder
 ```python
 from tokenizers.decoders import ByteLevel
 tokenizer.decoder = ByteLevel()
 # Handles byte-level tokens
 # ["ĠHello", "Ġworld"] → "Hello world"
 ```
 ### WordPiece decoder
 ```python
 from tokenizers.decoders import WordPiece
 tokenizer.decoder = WordPiece(prefix="##")
 # Removes ## prefix and concatenates
 # ["token", "##ization"] → "tokenization"
 ```
 ### Metaspace decoder
 ```python
 from tokenizers.decoders import Metaspace
 tokenizer.decoder = Metaspace(replacement="▁", add_prefix_space=True)
 # Converts ▁ back to spaces
 # ["▁Hello", "▁world"] → "Hello world"
 ```
 ### BPEDecoder
 ```python
 from tokenizers.decoders import BPEDecoder
 tokenizer.decoder = BPEDecoder(suffix="</w>")
 # Removes suffix and concatenates
 # ["token", "ization</w>"] → "tokenization"
 ```
 ### Sequence decoder
 ```python
 from tokenizers.decoders import Sequence, ByteLevel, Strip
 tokenizer.decoder = Sequence([
    ByteLevel(),      # Decode byte-level first
    Strip(' ', 1, 1)  # Strip leading/trailing spaces
 ])
 ```
 ## Complete pipeline examples
 ### BERT tokenizer
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import WordPiece
 from tokenizers.normalizers import BertNormalizer
 from tokenizers.pre_tokenizers import BertPreTokenizer
 from tokenizers.processors import TemplateProcessing
 from tokenizers.decoders import WordPiece as WordPieceDecoder
 # Model
 tokenizer = Tokenizer(WordPiece(unk_token="[UNK]"))
 # Normalization
 tokenizer.normalizer = BertNormalizer(lowercase=True)
 # Pre-tokenization
 tokenizer.pre_tokenizer = BertPreTokenizer()
 # Post-processing
 tokenizer.post_processor = TemplateProcessing(
    single="[CLS] $A [SEP]",
    pair="[CLS] $A [SEP] $B [SEP]",
    special_tokens=[("[CLS]", 101), ("[SEP]", 102)],
 )
 # Decoder
 tokenizer.decoder = WordPieceDecoder(prefix="##")
 # Enable padding
 tokenizer.enable_padding(pad_id=0, pad_token="[PAD]")
 # Enable truncation
 tokenizer.enable_truncation(max_length=512)
 ```
 ### GPT-2 tokenizer
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.normalizers import NFC
 from tokenizers.pre_tokenizers import ByteLevel
 from tokenizers.decoders import ByteLevel as ByteLevelDecoder
 from tokenizers.processors import TemplateProcessing
 # Model
 tokenizer = Tokenizer(BPE())
 # Normalization (minimal)
 tokenizer.normalizer = NFC()
 # Byte-level pre-tokenization
 tokenizer.pre_tokenizer = ByteLevel(add_prefix_space=False)
 # Post-processing
 tokenizer.post_processor = TemplateProcessing(
    single="$A <|endoftext|>",
    special_tokens=[("<|endoftext|>", 50256)],
 )
 # Byte-level decoder
 tokenizer.decoder = ByteLevelDecoder()
 ```
 ### T5 tokenizer (SentencePiece-style)
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import Unigram
 from tokenizers.normalizers import NFKC
 from tokenizers.pre_tokenizers import Metaspace
 from tokenizers.decoders import Metaspace as MetaspaceDecoder
 # Model
 tokenizer = Tokenizer(Unigram())
 # Normalization
 tokenizer.normalizer = NFKC()
 # Metaspace pre-tokenization
 tokenizer.pre_tokenizer = Metaspace(replacement="▁", add_prefix_space=True)
 # No post-processing (T5 doesn't add CLS/SEP)
 tokenizer.post_processor = None
 # Metaspace decoder
 tokenizer.decoder = MetaspaceDecoder(replacement="▁", add_prefix_space=True)
 ```
 ## Alignment tracking
 Track token positions in original text.
 ### Basic alignment
 ```python
 text = "Hello, world!"
 output = tokenizer.encode(text)
 for token, (start, end) in zip(output.tokens, output.offsets):
    print(f"{token:10s} → [{start:2d}, {end:2d}): {text[start:end]!r}")
 # Output:
 # [CLS]      → [ 0,  0): ''
 # hello      → [ 0,  5): 'Hello'
 # ,          → [ 5,  6): ','
 # world      → [ 7, 12): 'world'
 # !          → [12, 13): '!'
 # [SEP]      → [ 0,  0): ''
 ```
 ### Word-level alignment
 ```python
 # Get word_ids (which word each token belongs to)
 encoding = tokenizer.encode("Hello world")
 word_ids = encoding.word_ids
 print(word_ids)
 # [None, 0, 0, 1, None]
 # None = special token, 0 = first word, 1 = second word
 ```
 **Use case**: Token classification (NER)
 ```python
 # Align predictions to words
 predictions = ["O", "B-PER", "I-PER", "O", "O"]
 word_predictions = {}
 for token_idx, word_idx in enumerate(encoding.word_ids):
    if word_idx is not None and word_idx not in word_predictions:
        word_predictions[word_idx] = predictions[token_idx]
 print(word_predictions)
 # {0: "B-PER", 1: "O"}  # First word is PERSON, second is OTHER
 ```
 ### Span alignment
 ```python
 # Find token span for character span
 text = "Machine learning is awesome"
 char_start, char_end = 8, 16  # "learning"
 encoding = tokenizer.encode(text)
 # Find token span
 token_start = encoding.char_to_token(char_start)
 token_end = encoding.char_to_token(char_end - 1) + 1
 print(f"Tokens {token_start}:{token_end} = {encoding.tokens[token_start:token_end]}")
 # Tokens 2:3 = ['learning']
 ```
 **Use case**: Question answering (extract answer span)
 ## Custom components
 ### Custom normalizer
 ```python
 from tokenizers import NormalizedString, Normalizer
 class CustomNormalizer:
    def normalize(self, normalized: NormalizedString):
        # Custom normalization logic
        normalized.lowercase()
        normalized.replace("  ", " ")  # Replace double spaces
 # Use custom normalizer
 tokenizer.normalizer = CustomNormalizer()
 ```
 ### Custom pre-tokenizer
 ```python
 from tokenizers import PreTokenizedString
 class CustomPreTokenizer:
    def pre_tokenize(self, pretok: PreTokenizedString):
        # Custom pre-tokenization logic
        pretok.split(lambda i, char: char.isspace())
 tokenizer.pre_tokenizer = CustomPreTokenizer()
 ```
 ## Troubleshooting
 ### Issue: Misaligned offsets
 **Symptom**: Offsets don't match original text
 ```python
 text = "  hello"  # Leading spaces
 offsets = [(0, 5)]  # Expects "  hel"
 ```
 **Solution**: Check normalization strips spaces
 ```python
 # Preserve offsets
 tokenizer.normalizer = Sequence([
    Strip(),  # This changes offsets!
 ])
 # Use trim_offsets in post-processor instead
 tokenizer.post_processor = ByteLevelProcessing(trim_offsets=True)
 ```
 ### Issue: Special tokens not added
 **Symptom**: No [CLS] or [SEP] in output
 **Solution**: Check post-processor is set
 ```python
 tokenizer.post_processor = TemplateProcessing(
    single="[CLS] $A [SEP]",
    special_tokens=[("[CLS]", 101), ("[SEP]", 102)],
 )
 ```
 ### Issue: Incorrect decoding
 **Symptom**: Decoded text has ## or ▁
 **Solution**: Set correct decoder
 ```python
 # For WordPiece
 tokenizer.decoder = WordPieceDecoder(prefix="##")
 # For SentencePiece
 tokenizer.decoder = MetaspaceDecoder(replacement="▁")
 ```
 ## Best practices
 1. **Match pipeline to model architecture**:
   - BERT → BertNormalizer + BertPreTokenizer + WordPiece
   - GPT-2 → NFC + ByteLevel + BPE
   - T5 → NFKC + Metaspace + Unigram
 2. **Test pipeline on sample inputs**:
   - Check normalization doesn't over-normalize
   - Verify pre-tokenization splits correctly
   - Ensure decoding reconstructs text
 3. **Preserve alignment for downstream tasks**:
   - Use `trim_offsets` instead of stripping in normalizer
   - Test `char_to_token()` on sample spans
 4. **Document your pipeline**:
   - Save complete tokenizer config
   - Document special tokens
   - Note any custom components
--- a/skills/mlops/huggingface-tokenizers/references/training.md
+++ b/skills/mlops/huggingface-tokenizers/references/training.md
@ -0,0 +1,565 @@
 # Training Custom Tokenizers
 Complete guide to training tokenizers from scratch.
 ## Training workflow
 ### Step 1: Choose tokenization algorithm
 **Decision tree**:
 - **GPT-style model** → BPE
 - **BERT-style model** → WordPiece
 - **Multilingual/No word boundaries** → Unigram
 ### Step 2: Prepare training data
 ```python
 # Option 1: From files
 files = ["train.txt", "validation.txt"]
 # Option 2: From Python list
 texts = [
    "This is the first sentence.",
    "This is the second sentence.",
    # ... more texts
 ]
 # Option 3: From dataset iterator
 from datasets import load_dataset
 dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")
 def batch_iterator(batch_size=1000):
    for i in range(0, len(dataset), batch_size):
        yield dataset[i:i + batch_size]["text"]
 ```
 ### Step 3: Initialize tokenizer
 **BPE example**:
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 from tokenizers.pre_tokenizers import ByteLevel
 from tokenizers.decoders import ByteLevel as ByteLevelDecoder
 tokenizer = Tokenizer(BPE())
 tokenizer.pre_tokenizer = ByteLevel()
 tokenizer.decoder = ByteLevelDecoder()
 trainer = BpeTrainer(
    vocab_size=50000,
    min_frequency=2,
    special_tokens=["<|endoftext|>", "<|padding|>"],
    show_progress=True
 )
 ```
 **WordPiece example**:
 ```python
 from tokenizers.models import WordPiece
 from tokenizers.trainers import WordPieceTrainer
 from tokenizers.normalizers import BertNormalizer
 from tokenizers.pre_tokenizers import BertPreTokenizer
 tokenizer = Tokenizer(WordPiece(unk_token="[UNK]"))
 tokenizer.normalizer = BertNormalizer(lowercase=True)
 tokenizer.pre_tokenizer = BertPreTokenizer()
 trainer = WordPieceTrainer(
    vocab_size=30522,
    min_frequency=2,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"],
    continuing_subword_prefix="##",
    show_progress=True
 )
 ```
 **Unigram example**:
 ```python
 from tokenizers.models import Unigram
 from tokenizers.trainers import UnigramTrainer
 tokenizer = Tokenizer(Unigram())
 trainer = UnigramTrainer(
    vocab_size=8000,
    special_tokens=["<unk>", "<s>", "</s>", "<pad>"],
    unk_token="<unk>",
    show_progress=True
 )
 ```
 ### Step 4: Train
 ```python
 # From files
 tokenizer.train(files=files, trainer=trainer)
 # From iterator (recommended for large datasets)
 tokenizer.train_from_iterator(
    batch_iterator(),
    trainer=trainer,
    length=len(dataset)  # Optional, for progress bar
 )
 ```
 **Training time** (30k vocab on 16-core CPU):
 - 10 MB: 15-30 seconds
 - 100 MB: 1-3 minutes
 - 1 GB: 15-30 minutes
 - 10 GB: 2-4 hours
 ### Step 5: Add post-processing
 ```python
 from tokenizers.processors import TemplateProcessing
 # BERT-style
 tokenizer.post_processor = TemplateProcessing(
    single="[CLS] $A [SEP]",
    pair="[CLS] $A [SEP] $B [SEP]",
    special_tokens=[
        ("[CLS]", tokenizer.token_to_id("[CLS]")),
        ("[SEP]", tokenizer.token_to_id("[SEP]")),
    ],
 )
 # GPT-2 style
 tokenizer.post_processor = TemplateProcessing(
    single="$A <|endoftext|>",
    special_tokens=[
        ("<|endoftext|>", tokenizer.token_to_id("<|endoftext|>")),
    ],
 )
 ```
 ### Step 6: Save
 ```python
 # Save to JSON
 tokenizer.save("my-tokenizer.json")
 # Save to directory (for transformers)
 tokenizer.save("my-tokenizer-dir/tokenizer.json")
 # Convert to transformers format
 from transformers import PreTrainedTokenizerFast
 transformers_tokenizer = PreTrainedTokenizerFast(
    tokenizer_object=tokenizer,
    unk_token="[UNK]",
    pad_token="[PAD]",
    cls_token="[CLS]",
    sep_token="[SEP]",
    mask_token="[MASK]"
 )
 transformers_tokenizer.save_pretrained("my-tokenizer-dir")
 ```
 ## Trainer configuration
 ### BpeTrainer parameters
 ```python
 from tokenizers.trainers import BpeTrainer
 trainer = BpeTrainer(
    vocab_size=30000,              # Target vocabulary size
    min_frequency=2,               # Minimum frequency for merges
    special_tokens=["[UNK]"],      # Special tokens (added first)
    limit_alphabet=1000,           # Limit initial alphabet size
    initial_alphabet=[],           # Pre-defined initial characters
    show_progress=True,            # Show progress bar
    continuing_subword_prefix="",  # Prefix for continuing subwords
    end_of_word_suffix=""          # Suffix for end of words
 )
 ```
 **Parameter tuning**:
 - **vocab_size**: Start with 30k for English, 50k for multilingual
 - **min_frequency**: 2-5 for large corpora, 1 for small
 - **limit_alphabet**: Reduce for non-English (CJK languages)
 ### WordPieceTrainer parameters
 ```python
 from tokenizers.trainers import WordPieceTrainer
 trainer = WordPieceTrainer(
    vocab_size=30522,              # BERT uses 30,522
    min_frequency=2,
    special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"],
    limit_alphabet=1000,
    continuing_subword_prefix="##", # BERT-style prefix
    show_progress=True
 )
 ```
 ### UnigramTrainer parameters
 ```python
 from tokenizers.trainers import UnigramTrainer
 trainer = UnigramTrainer(
    vocab_size=8000,               # Typically smaller than BPE/WordPiece
    special_tokens=["<unk>", "<s>", "</s>"],
    unk_token="<unk>",
    max_piece_length=16,           # Maximum token length
    n_sub_iterations=2,            # EM algorithm iterations
    shrinking_factor=0.75,         # Vocabulary reduction rate
    show_progress=True
 )
 ```
 ## Training from large datasets
 ### Memory-efficient training
 ```python
 from datasets import load_dataset
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 # Load dataset
 dataset = load_dataset("wikipedia", "20220301.en", split="train", streaming=True)
 # Create iterator (yields batches)
 def batch_iterator(batch_size=1000):
    batch = []
    for sample in dataset:
        batch.append(sample["text"])
        if len(batch) >= batch_size:
            yield batch
            batch = []
    if batch:
        yield batch
 # Initialize tokenizer
 tokenizer = Tokenizer(BPE())
 trainer = BpeTrainer(vocab_size=50000, special_tokens=["<|endoftext|>"])
 # Train (memory efficient - streams data)
 tokenizer.train_from_iterator(
    batch_iterator(),
    trainer=trainer
 )
 ```
 **Memory usage**: ~200 MB (vs 10+ GB loading full dataset)
 ### Multi-file training
 ```python
 import glob
 # Find all training files
 files = glob.glob("data/train/*.txt")
 print(f"Training on {len(files)} files")
 # Train on all files
 tokenizer.train(files=files, trainer=trainer)
 ```
 ### Parallel training (multi-processing)
 ```python
 from multiprocessing import Pool, cpu_count
 import os
 def train_shard(shard_files):
    """Train tokenizer on a shard of files."""
    tokenizer = Tokenizer(BPE())
    trainer = BpeTrainer(vocab_size=50000)
    tokenizer.train(files=shard_files, trainer=trainer)
    return tokenizer.get_vocab()
 # Split files into shards
 num_shards = cpu_count()
 file_shards = [files[i::num_shards] for i in range(num_shards)]
 # Train shards in parallel
 with Pool(num_shards) as pool:
    vocab_shards = pool.map(train_shard, file_shards)
 # Merge vocabularies (custom logic needed)
 # This is a simplified example - real implementation would merge intelligently
 final_vocab = {}
 for vocab in vocab_shards:
    final_vocab.update(vocab)
 ```
 ## Domain-specific tokenizers
 ### Code tokenizer
 ```python
 from tokenizers import Tokenizer
 from tokenizers.models import BPE
 from tokenizers.trainers import BpeTrainer
 from tokenizers.pre_tokenizers import ByteLevel
 from tokenizers.normalizers import Sequence, NFC
 # Code-optimized configuration
 tokenizer = Tokenizer(BPE())
 # Minimal normalization (preserve case, whitespace)
 tokenizer.normalizer = NFC()  # Only normalize Unicode
 # Byte-level pre-tokenization (handles all characters)
 tokenizer.pre_tokenizer = ByteLevel()
 # Train on code corpus
 trainer = BpeTrainer(
    vocab_size=50000,
    special_tokens=["<|endoftext|>", "<|pad|>"],
    min_frequency=2
 )
 tokenizer.train(files=["code_corpus.txt"], trainer=trainer)
 ```
 ### Medical/scientific tokenizer
 ```python
 # Preserve case and special characters
 from tokenizers.normalizers import NFKC
 from tokenizers.pre_tokenizers import Whitespace, Punctuation, Sequence
 tokenizer = Tokenizer(BPE())
 # Minimal normalization
 tokenizer.normalizer = NFKC()
 # Preserve medical terms
 tokenizer.pre_tokenizer = Sequence([
    Whitespace(),
    Punctuation(behavior="isolated")  # Keep punctuation separate
 ])
 trainer = BpeTrainer(
    vocab_size=50000,
    special_tokens=["[UNK]", "[CLS]", "[SEP]"],
    min_frequency=3  # Higher threshold for rare medical terms
 )
 tokenizer.train(files=["pubmed_corpus.txt"], trainer=trainer)
 ```
 ### Multilingual tokenizer
 ```python
 # Handle multiple scripts
 from tokenizers.normalizers import NFKC, Lowercase, Sequence
 tokenizer = Tokenizer(BPE())
 # Normalize but don't lowercase (preserves script differences)
 tokenizer.normalizer = NFKC()
 # Byte-level handles all Unicode
 from tokenizers.pre_tokenizers import ByteLevel
 tokenizer.pre_tokenizer = ByteLevel()
 trainer = BpeTrainer(
    vocab_size=100000,  # Larger vocab for multiple languages
    special_tokens=["<unk>", "<s>", "</s>"],
    limit_alphabet=None  # No limit (handles all scripts)
 )
 # Train on multilingual corpus
 tokenizer.train(files=["multilingual_corpus.txt"], trainer=trainer)
 ```
 ## Vocabulary size selection
 ### Guidelines by task
 | Task                  | Recommended Vocab Size | Rationale |
 |-----------------------|------------------------|-----------|
 | English (monolingual) | 30,000 - 50,000       | Balanced coverage |
 | Multilingual          | 50,000 - 250,000      | More languages = more tokens |
 | Code                  | 30,000 - 50,000       | Similar to English |
 | Domain-specific       | 10,000 - 30,000       | Smaller, focused vocabulary |
 | Character-level tasks | 1,000 - 5,000         | Only characters + subwords |
 ### Vocabulary size impact
 **Small vocab (10k)**:
 - Pros: Faster training, smaller model, less memory
 - Cons: More tokens per sentence, worse OOV handling
 **Medium vocab (30k-50k)**:
 - Pros: Good balance, standard choice
 - Cons: None (recommended default)
 **Large vocab (100k+)**:
 - Pros: Fewer tokens per sentence, better OOV
 - Cons: Slower training, larger embedding table
 ### Empirical testing
 ```python
 # Train multiple tokenizers with different vocab sizes
 vocab_sizes = [10000, 30000, 50000, 100000]
 for vocab_size in vocab_sizes:
    tokenizer = Tokenizer(BPE())
    trainer = BpeTrainer(vocab_size=vocab_size)
    tokenizer.train(files=["sample.txt"], trainer=trainer)
    # Evaluate on test set
    test_text = "Test sentence for evaluation..."
    tokens = tokenizer.encode(test_text).ids
    print(f"Vocab: {vocab_size:6d} | Tokens: {len(tokens):3d} | Avg: {len(test_text)/len(tokens):.2f} chars/token")
 # Example output:
 # Vocab:  10000 | Tokens:  12 | Avg: 2.33 chars/token
 # Vocab:  30000 | Tokens:   8 | Avg: 3.50 chars/token
 # Vocab:  50000 | Tokens:   7 | Avg: 4.00 chars/token
 # Vocab: 100000 | Tokens:   6 | Avg: 4.67 chars/token
 ```
 ## Testing tokenizer quality
 ### Coverage test
 ```python
 # Test on held-out data
 test_corpus = load_dataset("wikitext", "wikitext-103-raw-v1", split="test")
 total_tokens = 0
 unk_tokens = 0
 unk_id = tokenizer.token_to_id("[UNK]")
 for text in test_corpus["text"]:
    if text.strip():
        encoding = tokenizer.encode(text)
        total_tokens += len(encoding.ids)
        unk_tokens += encoding.ids.count(unk_id)
 unk_rate = unk_tokens / total_tokens
 print(f"Unknown token rate: {unk_rate:.2%}")
 # Good quality: <1% unknown tokens
 # Acceptable: 1-5%
 # Poor: >5%
 ```
 ### Compression test
 ```python
 # Measure tokenization efficiency
 import numpy as np
 token_lengths = []
 for text in test_corpus["text"][:1000]:
    if text.strip():
        encoding = tokenizer.encode(text)
        chars_per_token = len(text) / len(encoding.ids)
        token_lengths.append(chars_per_token)
 avg_chars_per_token = np.mean(token_lengths)
 print(f"Average characters per token: {avg_chars_per_token:.2f}")
 # Good: 4-6 chars/token (English)
 # Acceptable: 3-4 chars/token
 # Poor: <3 chars/token (under-compression)
 ```
 ### Semantic test
 ```python
 # Manually inspect tokenization of common words/phrases
 test_phrases = [
    "tokenization",
    "machine learning",
    "artificial intelligence",
    "preprocessing",
    "hello world"
 ]
 for phrase in test_phrases:
    tokens = tokenizer.encode(phrase).tokens
    print(f"{phrase:25s} → {tokens}")
 # Good tokenization:
 # tokenization              → ['token', 'ization']
 # machine learning          → ['machine', 'learning']
 # artificial intelligence   → ['artificial', 'intelligence']
 ```
 ## Troubleshooting
 ### Issue: Training too slow
 **Solutions**:
 1. Reduce vocabulary size
 2. Increase `min_frequency`
 3. Use `limit_alphabet` to reduce initial alphabet
 4. Train on subset first
 ```python
 # Fast training configuration
 trainer = BpeTrainer(
    vocab_size=20000,      # Smaller vocab
    min_frequency=5,       # Higher threshold
    limit_alphabet=500,    # Limit alphabet
    show_progress=True
 )
 ```
 ### Issue: High unknown token rate
 **Solutions**:
 1. Increase vocabulary size
 2. Decrease `min_frequency`
 3. Check normalization (might be too aggressive)
 ```python
 # Better coverage configuration
 trainer = BpeTrainer(
    vocab_size=50000,      # Larger vocab
    min_frequency=1,       # Lower threshold
 )
 ```
 ### Issue: Poor quality tokenization
 **Solutions**:
 1. Verify normalization matches your use case
 2. Check pre-tokenization splits correctly
 3. Ensure training data is representative
 4. Try different algorithm (BPE vs WordPiece vs Unigram)
 ```python
 # Debug tokenization pipeline
 text = "Sample text to debug"
 # Check normalization
 normalized = tokenizer.normalizer.normalize_str(text)
 print(f"Normalized: {normalized}")
 # Check pre-tokenization
 pre_tokens = tokenizer.pre_tokenizer.pre_tokenize_str(text)
 print(f"Pre-tokens: {pre_tokens}")
 # Check final tokenization
 tokens = tokenizer.encode(text).tokens
 print(f"Tokens: {tokens}")
 ```
 ## Best practices
 1. **Use representative training data** - Match your target domain
 2. **Start with standard configs** - BERT WordPiece or GPT-2 BPE
 3. **Test on held-out data** - Measure unknown token rate
 4. **Iterate on vocabulary size** - Test 30k, 50k, 100k
 5. **Save tokenizer with model** - Ensure reproducibility
 6. **Version your tokenizers** - Track changes for reproducibility
 7. **Document special tokens** - Critical for model training
--- a/skills/mlops/instructor/SKILL.md
+++ b/skills/mlops/instructor/SKILL.md
@ -0,0 +1,740 @@
 ---
 name: instructor
 description: Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Prompt Engineering, Instructor, Structured Output, Pydantic, Data Extraction, JSON Parsing, Type Safety, Validation, Streaming, OpenAI, Anthropic]
 dependencies: [instructor, pydantic, openai, anthropic]
 ---
 # Instructor: Structured LLM Outputs
 ## When to Use This Skill
 Use Instructor when you need to:
 - **Extract structured data** from LLM responses reliably
 - **Validate outputs** against Pydantic schemas automatically
 - **Retry failed extractions** with automatic error handling
 - **Parse complex JSON** with type safety and validation
 - **Stream partial results** for real-time processing
 - **Support multiple LLM providers** with consistent API
 **GitHub Stars**: 15,000+ | **Battle-tested**: 100,000+ developers
 ## Installation
 ```bash
 # Base installation
 pip install instructor
 # With specific providers
 pip install "instructor[anthropic]"  # Anthropic Claude
 pip install "instructor[openai]"     # OpenAI
 pip install "instructor[all]"        # All providers
 ```
 ## Quick Start
 ### Basic Example: Extract User Data
 ```python
 import instructor
 from pydantic import BaseModel
 from anthropic import Anthropic
 # Define output structure
 class User(BaseModel):
    name: str
    age: int
    email: str
 # Create instructor client
 client = instructor.from_anthropic(Anthropic())
 # Extract structured data
 user = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "John Doe is 30 years old. His email is john@example.com"
    }],
    response_model=User
 )
 print(user.name)   # "John Doe"
 print(user.age)    # 30
 print(user.email)  # "john@example.com"
 ```
 ### With OpenAI
 ```python
 from openai import OpenAI
 client = instructor.from_openai(OpenAI())
 user = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=User,
    messages=[{"role": "user", "content": "Extract: Alice, 25, alice@email.com"}]
 )
 ```
 ## Core Concepts
 ### 1. Response Models (Pydantic)
 Response models define the structure and validation rules for LLM outputs.
 #### Basic Model
 ```python
 from pydantic import BaseModel, Field
 class Article(BaseModel):
    title: str = Field(description="Article title")
    author: str = Field(description="Author name")
    word_count: int = Field(description="Number of words", gt=0)
    tags: list[str] = Field(description="List of relevant tags")
 article = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Analyze this article: [article text]"
    }],
    response_model=Article
 )
 ```
 **Benefits:**
 - Type safety with Python type hints
 - Automatic validation (word_count > 0)
 - Self-documenting with Field descriptions
 - IDE autocomplete support
 #### Nested Models
 ```python
 class Address(BaseModel):
    street: str
    city: str
    country: str
 class Person(BaseModel):
    name: str
    age: int
    address: Address  # Nested model
 person = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "John lives at 123 Main St, Boston, USA"
    }],
    response_model=Person
 )
 print(person.address.city)  # "Boston"
 ```
 #### Optional Fields
 ```python
 from typing import Optional
 class Product(BaseModel):
    name: str
    price: float
    discount: Optional[float] = None  # Optional
    description: str = Field(default="No description")  # Default value
 # LLM doesn't need to provide discount or description
 ```
 #### Enums for Constraints
 ```python
 from enum import Enum
 class Sentiment(str, Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"
 class Review(BaseModel):
    text: str
    sentiment: Sentiment  # Only these 3 values allowed
 review = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "This product is amazing!"
    }],
    response_model=Review
 )
 print(review.sentiment)  # Sentiment.POSITIVE
 ```
 ### 2. Validation
 Pydantic validates LLM outputs automatically. If validation fails, Instructor retries.
 #### Built-in Validators
 ```python
 from pydantic import Field, EmailStr, HttpUrl
 class Contact(BaseModel):
    name: str = Field(min_length=2, max_length=100)
    age: int = Field(ge=0, le=120)  # 0 <= age <= 120
    email: EmailStr  # Validates email format
    website: HttpUrl  # Validates URL format
 # If LLM provides invalid data, Instructor retries automatically
 ```
 #### Custom Validators
 ```python
 from pydantic import field_validator
 class Event(BaseModel):
    name: str
    date: str
    attendees: int
    @field_validator('date')
    def validate_date(cls, v):
        """Ensure date is in YYYY-MM-DD format."""
        import re
        if not re.match(r'\d{4}-\d{2}-\d{2}', v):
            raise ValueError('Date must be YYYY-MM-DD format')
        return v
    @field_validator('attendees')
    def validate_attendees(cls, v):
        """Ensure positive attendees."""
        if v < 1:
            raise ValueError('Must have at least 1 attendee')
        return v
 ```
 #### Model-Level Validation
 ```python
 from pydantic import model_validator
 class DateRange(BaseModel):
    start_date: str
    end_date: str
    @model_validator(mode='after')
    def check_dates(self):
        """Ensure end_date is after start_date."""
        from datetime import datetime
        start = datetime.strptime(self.start_date, '%Y-%m-%d')
        end = datetime.strptime(self.end_date, '%Y-%m-%d')
        if end < start:
            raise ValueError('end_date must be after start_date')
        return self
 ```
 ### 3. Automatic Retrying
 Instructor retries automatically when validation fails, providing error feedback to the LLM.
 ```python
 # Retries up to 3 times if validation fails
 user = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Extract user from: John, age unknown"
    }],
    response_model=User,
    max_retries=3  # Default is 3
 )
 # If age can't be extracted, Instructor tells the LLM:
 # "Validation error: age - field required"
 # LLM tries again with better extraction
 ```
 **How it works:**
 1. LLM generates output
 2. Pydantic validates
 3. If invalid: Error message sent back to LLM
 4. LLM tries again with error feedback
 5. Repeats up to max_retries
 ### 4. Streaming
 Stream partial results for real-time processing.
 #### Streaming Partial Objects
 ```python
 from instructor import Partial
 class Story(BaseModel):
    title: str
    content: str
    tags: list[str]
 # Stream partial updates as LLM generates
 for partial_story in client.messages.create_partial(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Write a short sci-fi story"
    }],
    response_model=Story
 ):
    print(f"Title: {partial_story.title}")
    print(f"Content so far: {partial_story.content[:100]}...")
    # Update UI in real-time
 ```
 #### Streaming Iterables
 ```python
 class Task(BaseModel):
    title: str
    priority: str
 # Stream list items as they're generated
 tasks = client.messages.create_iterable(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Generate 10 project tasks"
    }],
    response_model=Task
 )
 for task in tasks:
    print(f"- {task.title} ({task.priority})")
    # Process each task as it arrives
 ```
 ## Provider Configuration
 ### Anthropic Claude
 ```python
 import instructor
 from anthropic import Anthropic
 client = instructor.from_anthropic(
    Anthropic(api_key="your-api-key")
 )
 # Use with Claude models
 response = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[...],
    response_model=YourModel
 )
 ```
 ### OpenAI
 ```python
 from openai import OpenAI
 client = instructor.from_openai(
    OpenAI(api_key="your-api-key")
 )
 response = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=YourModel,
    messages=[...]
 )
 ```
 ### Local Models (Ollama)
 ```python
 from openai import OpenAI
 # Point to local Ollama server
 client = instructor.from_openai(
    OpenAI(
        base_url="http://localhost:11434/v1",
        api_key="ollama"  # Required but ignored
    ),
    mode=instructor.Mode.JSON
 )
 response = client.chat.completions.create(
    model="llama3.1",
    response_model=YourModel,
    messages=[...]
 )
 ```
 ## Common Patterns
 ### Pattern 1: Data Extraction from Text
 ```python
 class CompanyInfo(BaseModel):
    name: str
    founded_year: int
    industry: str
    employees: int
    headquarters: str
 text = """
 Tesla, Inc. was founded in 2003. It operates in the automotive and energy
 industry with approximately 140,000 employees. The company is headquartered
 in Austin, Texas.
 """
 company = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": f"Extract company information from: {text}"
    }],
    response_model=CompanyInfo
 )
 ```
 ### Pattern 2: Classification
 ```python
 class Category(str, Enum):
    TECHNOLOGY = "technology"
    FINANCE = "finance"
    HEALTHCARE = "healthcare"
    EDUCATION = "education"
    OTHER = "other"
 class ArticleClassification(BaseModel):
    category: Category
    confidence: float = Field(ge=0.0, le=1.0)
    keywords: list[str]
 classification = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Classify this article: [article text]"
    }],
    response_model=ArticleClassification
 )
 ```
 ### Pattern 3: Multi-Entity Extraction
 ```python
 class Person(BaseModel):
    name: str
    role: str
 class Organization(BaseModel):
    name: str
    industry: str
 class Entities(BaseModel):
    people: list[Person]
    organizations: list[Organization]
    locations: list[str]
 text = "Tim Cook, CEO of Apple, announced at the event in Cupertino..."
 entities = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": f"Extract all entities from: {text}"
    }],
    response_model=Entities
 )
 for person in entities.people:
    print(f"{person.name} - {person.role}")
 ```
 ### Pattern 4: Structured Analysis
 ```python
 class SentimentAnalysis(BaseModel):
    overall_sentiment: Sentiment
    positive_aspects: list[str]
    negative_aspects: list[str]
    suggestions: list[str]
    score: float = Field(ge=-1.0, le=1.0)
 review = "The product works well but setup was confusing..."
 analysis = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": f"Analyze this review: {review}"
    }],
    response_model=SentimentAnalysis
 )
 ```
 ### Pattern 5: Batch Processing
 ```python
 def extract_person(text: str) -> Person:
    return client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        messages=[{
            "role": "user",
            "content": f"Extract person from: {text}"
        }],
        response_model=Person
    )
 texts = [
    "John Doe is a 30-year-old engineer",
    "Jane Smith, 25, works in marketing",
    "Bob Johnson, age 40, software developer"
 ]
 people = [extract_person(text) for text in texts]
 ```
 ## Advanced Features
 ### Union Types
 ```python
 from typing import Union
 class TextContent(BaseModel):
    type: str = "text"
    content: str
 class ImageContent(BaseModel):
    type: str = "image"
    url: HttpUrl
    caption: str
 class Post(BaseModel):
    title: str
    content: Union[TextContent, ImageContent]  # Either type
 # LLM chooses appropriate type based on content
 ```
 ### Dynamic Models
 ```python
 from pydantic import create_model
 # Create model at runtime
 DynamicUser = create_model(
    'User',
    name=(str, ...),
    age=(int, Field(ge=0)),
    email=(EmailStr, ...)
 )
 user = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[...],
    response_model=DynamicUser
 )
 ```
 ### Custom Modes
 ```python
 # For providers without native structured outputs
 client = instructor.from_anthropic(
    Anthropic(),
    mode=instructor.Mode.JSON  # JSON mode
 )
 # Available modes:
 # - Mode.ANTHROPIC_TOOLS (recommended for Claude)
 # - Mode.JSON (fallback)
 # - Mode.TOOLS (OpenAI tools)
 ```
 ### Context Management
 ```python
 # Single-use client
 with instructor.from_anthropic(Anthropic()) as client:
    result = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        messages=[...],
        response_model=YourModel
    )
    # Client closed automatically
 ```
 ## Error Handling
 ### Handling Validation Errors
 ```python
 from pydantic import ValidationError
 try:
    user = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        messages=[...],
        response_model=User,
        max_retries=3
    )
 except ValidationError as e:
    print(f"Failed after retries: {e}")
    # Handle gracefully
 except Exception as e:
    print(f"API error: {e}")
 ```
 ### Custom Error Messages
 ```python
 class ValidatedUser(BaseModel):
    name: str = Field(description="Full name, 2-100 characters")
    age: int = Field(description="Age between 0 and 120", ge=0, le=120)
    email: EmailStr = Field(description="Valid email address")
    class Config:
        # Custom error messages
        json_schema_extra = {
            "examples": [
                {
                    "name": "John Doe",
                    "age": 30,
                    "email": "john@example.com"
                }
            ]
        }
 ```
 ## Best Practices
 ### 1. Clear Field Descriptions
 ```python
 # ❌ Bad: Vague
 class Product(BaseModel):
    name: str
    price: float
 # ✅ Good: Descriptive
 class Product(BaseModel):
    name: str = Field(description="Product name from the text")
    price: float = Field(description="Price in USD, without currency symbol")
 ```
 ### 2. Use Appropriate Validation
 ```python
 # ✅ Good: Constrain values
 class Rating(BaseModel):
    score: int = Field(ge=1, le=5, description="Rating from 1 to 5 stars")
    review: str = Field(min_length=10, description="Review text, at least 10 chars")
 ```
 ### 3. Provide Examples in Prompts
 ```python
 messages = [{
    "role": "user",
    "content": """Extract person info from: "John, 30, engineer"
 Example format:
 {
  "name": "John Doe",
  "age": 30,
  "occupation": "engineer"
 }"""
 }]
 ```
 ### 4. Use Enums for Fixed Categories
 ```python
 # ✅ Good: Enum ensures valid values
 class Status(str, Enum):
    PENDING = "pending"
    APPROVED = "approved"
    REJECTED = "rejected"
 class Application(BaseModel):
    status: Status  # LLM must choose from enum
 ```
 ### 5. Handle Missing Data Gracefully
 ```python
 class PartialData(BaseModel):
    required_field: str
    optional_field: Optional[str] = None
    default_field: str = "default_value"
 # LLM only needs to provide required_field
 ```
 ## Comparison to Alternatives
 | Feature | Instructor | Manual JSON | LangChain | DSPy |
 |---------|------------|-------------|-----------|------|
 | Type Safety | ✅ Yes | ❌ No | ⚠️ Partial | ✅ Yes |
 | Auto Validation | ✅ Yes | ❌ No | ❌ No | ⚠️ Limited |
 | Auto Retry | ✅ Yes | ❌ No | ❌ No | ✅ Yes |
 | Streaming | ✅ Yes | ❌ No | ✅ Yes | ❌ No |
 | Multi-Provider | ✅ Yes | ⚠️ Manual | ✅ Yes | ✅ Yes |
 | Learning Curve | Low | Low | Medium | High |
 **When to choose Instructor:**
 - Need structured, validated outputs
 - Want type safety and IDE support
 - Require automatic retries
 - Building data extraction systems
 **When to choose alternatives:**
 - DSPy: Need prompt optimization
 - LangChain: Building complex chains
 - Manual: Simple, one-off extractions
 ## Resources
 - **Documentation**: https://python.useinstructor.com
 - **GitHub**: https://github.com/jxnl/instructor (15k+ stars)
 - **Cookbook**: https://python.useinstructor.com/examples
 - **Discord**: Community support available
 ## See Also
 - `references/validation.md` - Advanced validation patterns
 - `references/providers.md` - Provider-specific configuration
 - `references/examples.md` - Real-world use cases
--- a/skills/mlops/instructor/references/examples.md
+++ b/skills/mlops/instructor/references/examples.md
@ -0,0 +1,107 @@
 # Real-World Examples
 Practical examples of using Instructor for structured data extraction.
 ## Data Extraction
 ```python
 class CompanyInfo(BaseModel):
    name: str
    founded: int
    industry: str
    employees: int
 text = "Apple was founded in 1976 in the technology industry with 164,000 employees."
 company = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{"role": "user", "content": f"Extract: {text}"}],
    response_model=CompanyInfo
 )
 ```
 ## Classification
 ```python
 class Sentiment(str, Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"
 class Review(BaseModel):
    sentiment: Sentiment
    confidence: float = Field(ge=0.0, le=1.0)
 review = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{"role": "user", "content": "This product is amazing!"}],
    response_model=Review
 )
 ```
 ## Multi-Entity Extraction
 ```python
 class Person(BaseModel):
    name: str
    role: str
 class Entities(BaseModel):
    people: list[Person]
    organizations: list[str]
    locations: list[str]
 entities = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Tim Cook, CEO of Apple, spoke in Cupertino..."}],
    response_model=Entities
 )
 ```
 ## Structured Analysis
 ```python
 class Analysis(BaseModel):
    summary: str
    key_points: list[str]
    sentiment: Sentiment
    actionable_items: list[str]
 analysis = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Analyze: [long text]"}],
    response_model=Analysis
 )
 ```
 ## Batch Processing
 ```python
 texts = ["text1", "text2", "text3"]
 results = [
    client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        messages=[{"role": "user", "content": text}],
        response_model=YourModel
    )
    for text in texts
 ]
 ```
 ## Streaming
 ```python
 for partial in client.messages.create_partial(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Generate report..."}],
    response_model=Report
 ):
    print(f"Progress: {partial.title}")
    # Update UI in real-time
 ```
--- a/skills/mlops/instructor/references/providers.md
+++ b/skills/mlops/instructor/references/providers.md
@ -0,0 +1,70 @@
 # Provider Configuration
 Guide to using Instructor with different LLM providers.
 ## Anthropic Claude
 ```python
 import instructor
 from anthropic import Anthropic
 # Basic setup
 client = instructor.from_anthropic(Anthropic())
 # With API key
 client = instructor.from_anthropic(
    Anthropic(api_key="your-api-key")
 )
 # Recommended mode
 client = instructor.from_anthropic(
    Anthropic(),
    mode=instructor.Mode.ANTHROPIC_TOOLS
 )
 # Usage
 result = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{"role": "user", "content": "..."}],
    response_model=YourModel
 )
 ```
 ## OpenAI
 ```python
 from openai import OpenAI
 client = instructor.from_openai(OpenAI())
 result = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=YourModel,
    messages=[{"role": "user", "content": "..."}]
 )
 ```
 ## Local Models (Ollama)
 ```python
 client = instructor.from_openai(
    OpenAI(
        base_url="http://localhost:11434/v1",
        api_key="ollama"
    ),
    mode=instructor.Mode.JSON
 )
 result = client.chat.completions.create(
    model="llama3.1",
    response_model=YourModel,
    messages=[...]
 )
 ```
 ## Modes
 - `Mode.ANTHROPIC_TOOLS`: Recommended for Claude
 - `Mode.TOOLS`: OpenAI function calling
 - `Mode.JSON`: Fallback for unsupported providers
--- a/skills/mlops/instructor/references/validation.md
+++ b/skills/mlops/instructor/references/validation.md
@ -0,0 +1,606 @@
 # Advanced Validation Patterns
 Complete guide to validation in Instructor using Pydantic.
 ## Table of Contents
 - Built-in Validators
 - Custom Field Validators
 - Model-Level Validation
 - Complex Validation Patterns
 - Error Handling
 ## Built-in Validators
 ### Numeric Constraints
 ```python
 from pydantic import BaseModel, Field
 class Product(BaseModel):
    price: float = Field(gt=0, description="Price must be positive")
    discount: float = Field(ge=0, le=100, description="Discount 0-100%")
    quantity: int = Field(ge=1, description="At least 1 item")
    rating: float = Field(ge=0.0, le=5.0, description="Rating 0-5 stars")
 # If LLM provides invalid values, automatic retry with error feedback
 ```
 **Available constraints:**
 - `gt`: Greater than
 - `ge`: Greater than or equal
 - `lt`: Less than
 - `le`: Less than or equal
 - `multiple_of`: Must be multiple of this number
 ### String Constraints
 ```python
 class User(BaseModel):
    username: str = Field(
        min_length=3,
        max_length=20,
        pattern=r'^[a-zA-Z0-9_]+$',
        description="3-20 alphanumeric characters"
    )
    bio: str = Field(max_length=500, description="Bio up to 500 chars")
    status: str = Field(pattern=r'^(active|inactive|pending)$')
 # pattern validates against regex
 ```
 ### Email and URL Validation
 ```python
 from pydantic import EmailStr, HttpUrl, AnyUrl
 class Contact(BaseModel):
    email: EmailStr  # Validates email format
    website: HttpUrl  # Validates HTTP/HTTPS URLs
    portfolio: AnyUrl  # Any valid URL scheme
 contact = client.messages.create(
    model="claude-sonnet-4-5-20250929",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": "Extract: john@example.com, https://example.com"
    }],
    response_model=Contact
 )
 ```
 ### Date and DateTime Validation
 ```python
 from datetime import date, datetime
 from pydantic import Field, field_validator
 class Event(BaseModel):
    event_date: date  # Validates date format
    created_at: datetime  # Validates datetime format
    year: int = Field(ge=1900, le=2100)
    @field_validator('event_date')
    def future_date(cls, v):
        """Ensure event is in the future."""
        if v < date.today():
            raise ValueError('Event must be in the future')
        return v
 ```
 ### List and Dict Validation
 ```python
 class Document(BaseModel):
    tags: list[str] = Field(min_length=1, max_length=10)
    keywords: list[str] = Field(min_length=3, description="At least 3 keywords")
    metadata: dict[str, str] = Field(description="String key-value pairs")
    @field_validator('tags')
    def unique_tags(cls, v):
        """Ensure tags are unique."""
        if len(v) != len(set(v)):
            raise ValueError('Tags must be unique')
        return v
 ```
 ## Custom Field Validators
 ### Basic Field Validator
 ```python
 from pydantic import field_validator
 class Person(BaseModel):
    name: str
    age: int
    @field_validator('name')
    def name_must_not_be_empty(cls, v):
        """Validate name is not empty or just whitespace."""
        if not v or not v.strip():
            raise ValueError('Name cannot be empty')
        return v.strip()
    @field_validator('age')
    def age_must_be_reasonable(cls, v):
        """Validate age is between 0 and 120."""
        if v < 0 or v > 120:
            raise ValueError('Age must be between 0 and 120')
        return v
 ```
 ### Validator with Field Info
 ```python
 from pydantic import ValidationInfo
 class Article(BaseModel):
    title: str
    content: str
    @field_validator('content')
    def content_length(cls, v, info: ValidationInfo):
        """Validate content is longer than title."""
        if 'title' in info.data:
            title_len = len(info.data['title'])
            if len(v) < title_len * 2:
                raise ValueError('Content should be at least 2x title length')
        return v
 ```
 ### Multiple Fields Validation
 ```python
 class TimeRange(BaseModel):
    start_time: str
    end_time: str
    @field_validator('start_time', 'end_time')
    def valid_time_format(cls, v):
        """Validate both times are in HH:MM format."""
        import re
        if not re.match(r'^\d{2}:\d{2}$', v):
            raise ValueError('Time must be in HH:MM format')
        return v
 ```
 ### Transform and Validate
 ```python
 class URL(BaseModel):
    url: str
    @field_validator('url')
    def normalize_url(cls, v):
        """Add https:// if missing."""
        if not v.startswith(('http://', 'https://')):
            v = f'https://{v}'
        return v
 ```
 ## Model-Level Validation
 ### Cross-Field Validation
 ```python
 from pydantic import model_validator
 class DateRange(BaseModel):
    start_date: str
    end_date: str
    @model_validator(mode='after')
    def check_dates(self):
        """Ensure end_date is after start_date."""
        from datetime import datetime
        start = datetime.strptime(self.start_date, '%Y-%m-%d')
        end = datetime.strptime(self.end_date, '%Y-%m-%d')
        if end < start:
            raise ValueError('end_date must be after start_date')
        return self
 class PriceRange(BaseModel):
    min_price: float
    max_price: float
    @model_validator(mode='after')
    def check_price_range(self):
        """Ensure max > min."""
        if self.max_price <= self.min_price:
            raise ValueError('max_price must be greater than min_price')
        return self
 ```
 ### Conditional Validation
 ```python
 class Order(BaseModel):
    order_type: str  # "standard" or "express"
    delivery_date: str
    delivery_time: Optional[str] = None
    @model_validator(mode='after')
    def check_delivery_time(self):
        """Express orders need delivery time."""
        if self.order_type == "express" and not self.delivery_time:
            raise ValueError('Express orders require delivery_time')
        return self
 ```
 ### Complex Business Logic
 ```python
 class Discount(BaseModel):
    code: str
    percentage: float = Field(ge=0, le=100)
    min_purchase: float = Field(ge=0)
    max_discount: float = Field(ge=0)
    @model_validator(mode='after')
    def validate_discount(self):
        """Ensure discount logic is sound."""
        # Max discount can't exceed percentage of min_purchase
        theoretical_max = (self.percentage / 100) * self.min_purchase
        if self.max_discount > theoretical_max:
            self.max_discount = theoretical_max
        return self
 ```
 ## Complex Validation Patterns
 ### Nested Model Validation
 ```python
 class Address(BaseModel):
    street: str
    city: str
    country: str
    postal_code: str
    @field_validator('postal_code')
    def validate_postal_code(cls, v, info: ValidationInfo):
        """Validate postal code format based on country."""
        if 'country' in info.data:
            country = info.data['country']
            if country == "USA":
                import re
                if not re.match(r'^\d{5}(-\d{4})?$', v):
                    raise ValueError('Invalid US postal code')
            elif country == "Canada":
                if not re.match(r'^[A-Z]\d[A-Z] \d[A-Z]\d$', v):
                    raise ValueError('Invalid Canadian postal code')
        return v
 class Person(BaseModel):
    name: str
    address: Address
 # Nested validation runs automatically
 ```
 ### List of Models
 ```python
 class Task(BaseModel):
    title: str = Field(min_length=1)
    priority: int = Field(ge=1, le=5)
 class Project(BaseModel):
    name: str
    tasks: list[Task] = Field(min_length=1, description="At least 1 task")
    @field_validator('tasks')
    def at_least_one_high_priority(cls, v):
        """Ensure at least one task has priority >= 4."""
        if not any(task.priority >= 4 for task in v):
            raise ValueError('Project needs at least one high-priority task')
        return v
 ```
 ### Union Type Validation
 ```python
 from typing import Union
 class TextBlock(BaseModel):
    type: str = "text"
    content: str = Field(min_length=1)
 class ImageBlock(BaseModel):
    type: str = "image"
    url: HttpUrl
    alt_text: str
 class Page(BaseModel):
    title: str
    blocks: list[Union[TextBlock, ImageBlock]]
    @field_validator('blocks')
    def validate_block_types(cls, v):
        """Ensure first block is TextBlock."""
        if v and not isinstance(v[0], TextBlock):
            raise ValueError('First block must be text')
        return v
 ```
 ### Dependent Fields
 ```python
 class Subscription(BaseModel):
    plan: str  # "free", "pro", "enterprise"
    max_users: int
    features: list[str]
    @model_validator(mode='after')
    def validate_plan_limits(self):
        """Enforce plan-specific limits."""
        limits = {
            "free": {"max_users": 1, "required_features": ["basic"]},
            "pro": {"max_users": 10, "required_features": ["basic", "advanced"]},
            "enterprise": {"max_users": 999, "required_features": ["basic", "advanced", "premium"]}
        }
        if self.plan in limits:
            limit = limits[self.plan]
            if self.max_users > limit["max_users"]:
                raise ValueError(f'{self.plan} plan limited to {limit["max_users"]} users')
            for feature in limit["required_features"]:
                if feature not in self.features:
                    raise ValueError(f'{self.plan} plan requires {feature} feature')
        return self
 ```
 ## Error Handling
 ### Graceful Degradation
 ```python
 class OptionalExtraction(BaseModel):
    # Required fields
    title: str
    # Optional fields with defaults
    author: Optional[str] = None
    date: Optional[str] = None
    tags: list[str] = Field(default_factory=list)
 # LLM can succeed even if it can't extract everything
 ```
 ### Partial Validation
 ```python
 from pydantic import ValidationError
 def extract_with_fallback(text: str):
    """Try full extraction, fall back to partial."""
    try:
        # Try full extraction
        return client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=1024,
            messages=[{"role": "user", "content": text}],
            response_model=FullModel
        )
    except ValidationError:
        # Fall back to partial model
        return client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=1024,
            messages=[{"role": "user", "content": text}],
            response_model=PartialModel
        )
 ```
 ### Validation Error Inspection
 ```python
 from pydantic import ValidationError
 try:
    result = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=1024,
        messages=[...],
        response_model=MyModel,
        max_retries=3
    )
 except ValidationError as e:
    # Inspect specific errors
    for error in e.errors():
        field = error['loc'][0]
        message = error['msg']
        print(f"Field '{field}' failed: {message}")
        # Custom handling per field
        if field == 'email':
            # Handle email validation failure
            pass
 ```
 ### Custom Error Messages
 ```python
 class DetailedModel(BaseModel):
    name: str = Field(
        min_length=2,
        max_length=100,
        description="Name between 2-100 characters"
    )
    age: int = Field(
        ge=0,
        le=120,
        description="Age between 0 and 120 years"
    )
    @field_validator('name')
    def validate_name(cls, v):
        """Provide helpful error message."""
        if not v.strip():
            raise ValueError(
                'Name cannot be empty. '
                'Please provide a valid name from the text.'
            )
        return v
 # When validation fails, LLM sees these helpful messages
 ```
 ## Validation Best Practices
 ### 1. Be Specific
 ```python
 # ❌ Bad: Vague validation
 class Item(BaseModel):
    name: str
 # ✅ Good: Specific constraints
 class Item(BaseModel):
    name: str = Field(
        min_length=1,
        max_length=200,
        description="Item name, 1-200 characters"
    )
 ```
 ### 2. Provide Context
 ```python
 # ✅ Good: Explain why validation failed
@field_validator('price')
 def validate_price(cls, v):
    if v <= 0:
        raise ValueError(
            'Price must be positive. '
            'Extract numeric price from text without currency symbols.'
        )
    return v
 ```
 ### 3. Use Enums for Fixed Sets
 ```python
 # ❌ Bad: String validation
 status: str
@field_validator('status')
 def validate_status(cls, v):
    if v not in ['active', 'inactive', 'pending']:
        raise ValueError('Invalid status')
    return v
 # ✅ Good: Enum
 class Status(str, Enum):
    ACTIVE = "active"
    INACTIVE = "inactive"
    PENDING = "pending"
 status: Status  # Validation automatic
 ```
 ### 4. Balance Strictness
 ```python
 # Too strict: May fail unnecessarily
 class StrictModel(BaseModel):
    date: str = Field(pattern=r'^\d{4}-\d{2}-\d{2}$')
    # Fails if LLM uses "2024-1-5" instead of "2024-01-05"
 # Better: Normalize in validator
 class FlexibleModel(BaseModel):
    date: str
    @field_validator('date')
    def normalize_date(cls, v):
        from datetime import datetime
        # Parse flexible formats
        for fmt in ['%Y-%m-%d', '%Y/%m/%d', '%m/%d/%Y']:
            try:
                dt = datetime.strptime(v, fmt)
                return dt.strftime('%Y-%m-%d')  # Normalize
            except ValueError:
                continue
        raise ValueError('Invalid date format')
 ```
 ### 5. Test Validation
 ```python
 # Test your validators with edge cases
 def test_validation():
    # Should succeed
    valid = MyModel(field="valid_value")
    # Should fail
    try:
        invalid = MyModel(field="invalid")
        assert False, "Should have raised ValidationError"
    except ValidationError:
        pass  # Expected
 # Run tests before using in production
 ```
 ## Advanced Techniques
 ### Conditional Required Fields
 ```python
 from typing import Optional
 class ConditionalModel(BaseModel):
    type: str
    detail_a: Optional[str] = None
    detail_b: Optional[str] = None
    @model_validator(mode='after')
    def check_required_details(self):
        """Require different fields based on type."""
        if self.type == "type_a" and not self.detail_a:
            raise ValueError('type_a requires detail_a')
        if self.type == "type_b" and not self.detail_b:
            raise ValueError('type_b requires detail_b')
        return self
 ```
 ### Validation with External Data
 ```python
 class Product(BaseModel):
    sku: str
    name: str
    @field_validator('sku')
    def validate_sku(cls, v):
        """Check SKU exists in database."""
        # Query database or API
        if not database.sku_exists(v):
            raise ValueError(f'SKU {v} not found in catalog')
        return v
 ```
 ### Progressive Validation
 ```python
 # Start with loose validation
 class Stage1(BaseModel):
    data: str  # Any string
 # Then strict validation
 class Stage2(BaseModel):
    data: str = Field(pattern=r'^[A-Z]{3}-\d{6}$')
 # Use Stage1 for initial extraction
 # Use Stage2 for final validation
 ```
 ## Resources
 - **Pydantic Docs**: https://docs.pydantic.dev/latest/concepts/validators/
 - **Instructor Examples**: https://python.useinstructor.com/examples
--- a/skills/mlops/lambda-labs/SKILL.md
+++ b/skills/mlops/lambda-labs/SKILL.md
@ -0,0 +1,545 @@
 ---
 name: lambda-labs-gpu-cloud
 description: Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Infrastructure, GPU Cloud, Training, Inference, Lambda Labs]
 dependencies: [lambda-cloud-client>=1.0.0]
 ---
 # Lambda Labs GPU Cloud
 Comprehensive guide to running ML workloads on Lambda Labs GPU cloud with on-demand instances and 1-Click Clusters.
 ## When to use Lambda Labs
 **Use Lambda Labs when:**
 - Need dedicated GPU instances with full SSH access
 - Running long training jobs (hours to days)
 - Want simple pricing with no egress fees
 - Need persistent storage across sessions
 - Require high-performance multi-node clusters (16-512 GPUs)
 - Want pre-installed ML stack (Lambda Stack with PyTorch, CUDA, NCCL)
 **Key features:**
 - **GPU variety**: B200, H100, GH200, A100, A10, A6000, V100
 - **Lambda Stack**: Pre-installed PyTorch, TensorFlow, CUDA, cuDNN, NCCL
 - **Persistent filesystems**: Keep data across instance restarts
 - **1-Click Clusters**: 16-512 GPU Slurm clusters with InfiniBand
 - **Simple pricing**: Pay-per-minute, no egress fees
 - **Global regions**: 12+ regions worldwide
 **Use alternatives instead:**
 - **Modal**: For serverless, auto-scaling workloads
 - **SkyPilot**: For multi-cloud orchestration and cost optimization
 - **RunPod**: For cheaper spot instances and serverless endpoints
 - **Vast.ai**: For GPU marketplace with lowest prices
 ## Quick start
 ### Account setup
 1. Create account at https://lambda.ai
 2. Add payment method
 3. Generate API key from dashboard
 4. Add SSH key (required before launching instances)
 ### Launch via console
 1. Go to https://cloud.lambda.ai/instances
 2. Click "Launch instance"
 3. Select GPU type and region
 4. Choose SSH key
 5. Optionally attach filesystem
 6. Launch and wait 3-15 minutes
 ### Connect via SSH
 ```bash
 # Get instance IP from console
 ssh ubuntu@<INSTANCE-IP>
 # Or with specific key
 ssh -i ~/.ssh/lambda_key ubuntu@<INSTANCE-IP>
 ```
 ## GPU instances
 ### Available GPUs
 | GPU | VRAM | Price/GPU/hr | Best For |
 |-----|------|--------------|----------|
 | B200 SXM6 | 180 GB | $4.99 | Largest models, fastest training |
 | H100 SXM | 80 GB | $2.99-3.29 | Large model training |
 | H100 PCIe | 80 GB | $2.49 | Cost-effective H100 |
 | GH200 | 96 GB | $1.49 | Single-GPU large models |
 | A100 80GB | 80 GB | $1.79 | Production training |
 | A100 40GB | 40 GB | $1.29 | Standard training |
 | A10 | 24 GB | $0.75 | Inference, fine-tuning |
 | A6000 | 48 GB | $0.80 | Good VRAM/price ratio |
 | V100 | 16 GB | $0.55 | Budget training |
 ### Instance configurations
 ```
 8x GPU: Best for distributed training (DDP, FSDP)
 4x GPU: Large models, multi-GPU training
 2x GPU: Medium workloads
 1x GPU: Fine-tuning, inference, development
 ```
 ### Launch times
 - Single-GPU: 3-5 minutes
 - Multi-GPU: 10-15 minutes
 ## Lambda Stack
 All instances come with Lambda Stack pre-installed:
 ```bash
 # Included software
 - Ubuntu 22.04 LTS
 - NVIDIA drivers (latest)
 - CUDA 12.x
 - cuDNN 8.x
 - NCCL (for multi-GPU)
 - PyTorch (latest)
 - TensorFlow (latest)
 - JAX
 - JupyterLab
 ```
 ### Verify installation
 ```bash
 # Check GPU
 nvidia-smi
 # Check PyTorch
 python -c "import torch; print(torch.cuda.is_available())"
 # Check CUDA version
 nvcc --version
 ```
 ## Python API
 ### Installation
 ```bash
 pip install lambda-cloud-client
 ```
 ### Authentication
 ```python
 import os
 import lambda_cloud_client
 # Configure with API key
 configuration = lambda_cloud_client.Configuration(
    host="https://cloud.lambdalabs.com/api/v1",
    access_token=os.environ["LAMBDA_API_KEY"]
 )
 ```
 ### List available instances
 ```python
 with lambda_cloud_client.ApiClient(configuration) as api_client:
    api = lambda_cloud_client.DefaultApi(api_client)
    # Get available instance types
    types = api.instance_types()
    for name, info in types.data.items():
        print(f"{name}: {info.instance_type.description}")
 ```
 ### Launch instance
 ```python
 from lambda_cloud_client.models import LaunchInstanceRequest
 request = LaunchInstanceRequest(
    region_name="us-west-1",
    instance_type_name="gpu_1x_h100_sxm5",
    ssh_key_names=["my-ssh-key"],
    file_system_names=["my-filesystem"],  # Optional
    name="training-job"
 )
 response = api.launch_instance(request)
 instance_id = response.data.instance_ids[0]
 print(f"Launched: {instance_id}")
 ```
 ### List running instances
 ```python
 instances = api.list_instances()
 for instance in instances.data:
    print(f"{instance.name}: {instance.ip} ({instance.status})")
 ```
 ### Terminate instance
 ```python
 from lambda_cloud_client.models import TerminateInstanceRequest
 request = TerminateInstanceRequest(
    instance_ids=[instance_id]
 )
 api.terminate_instance(request)
 ```
 ### SSH key management
 ```python
 from lambda_cloud_client.models import AddSshKeyRequest
 # Add SSH key
 request = AddSshKeyRequest(
    name="my-key",
    public_key="ssh-rsa AAAA..."
 )
 api.add_ssh_key(request)
 # List keys
 keys = api.list_ssh_keys()
 # Delete key
 api.delete_ssh_key(key_id)
 ```
 ## CLI with curl
 ### List instance types
 ```bash
 curl -u $LAMBDA_API_KEY: \
  https://cloud.lambdalabs.com/api/v1/instance-types | jq
 ```
 ### Launch instance
 ```bash
 curl -u $LAMBDA_API_KEY: \
  -X POST https://cloud.lambdalabs.com/api/v1/instance-operations/launch \
  -H "Content-Type: application/json" \
  -d '{
    "region_name": "us-west-1",
    "instance_type_name": "gpu_1x_h100_sxm5",
    "ssh_key_names": ["my-key"]
  }' | jq
 ```
 ### Terminate instance
 ```bash
 curl -u $LAMBDA_API_KEY: \
  -X POST https://cloud.lambdalabs.com/api/v1/instance-operations/terminate \
  -H "Content-Type: application/json" \
  -d '{"instance_ids": ["<INSTANCE-ID>"]}' | jq
 ```
 ## Persistent storage
 ### Filesystems
 Filesystems persist data across instance restarts:
 ```bash
 # Mount location
 /lambda/nfs/<FILESYSTEM_NAME>
 # Example: save checkpoints
 python train.py --checkpoint-dir /lambda/nfs/my-storage/checkpoints
 ```
 ### Create filesystem
 1. Go to Storage in Lambda console
 2. Click "Create filesystem"
 3. Select region (must match instance region)
 4. Name and create
 ### Attach to instance
 Filesystems must be attached at instance launch time:
 - Via console: Select filesystem when launching
 - Via API: Include `file_system_names` in launch request
 ### Best practices
 ```bash
 # Store on filesystem (persists)
 /lambda/nfs/storage/
  ├── datasets/
  ├── checkpoints/
  ├── models/
  └── outputs/
 # Local SSD (faster, ephemeral)
 /home/ubuntu/
  └── working/  # Temporary files
 ```
 ## SSH configuration
 ### Add SSH key
 ```bash
 # Generate key locally
 ssh-keygen -t ed25519 -f ~/.ssh/lambda_key
 # Add public key to Lambda console
 # Or via API
 ```
 ### Multiple keys
 ```bash
 # On instance, add more keys
 echo 'ssh-rsa AAAA...' >> ~/.ssh/authorized_keys
 ```
 ### Import from GitHub
 ```bash
 # On instance
 ssh-import-id gh:username
 ```
 ### SSH tunneling
 ```bash
 # Forward Jupyter
 ssh -L 8888:localhost:8888 ubuntu@<IP>
 # Forward TensorBoard
 ssh -L 6006:localhost:6006 ubuntu@<IP>
 # Multiple ports
 ssh -L 8888:localhost:8888 -L 6006:localhost:6006 ubuntu@<IP>
 ```
 ## JupyterLab
 ### Launch from console
 1. Go to Instances page
 2. Click "Launch" in Cloud IDE column
 3. JupyterLab opens in browser
 ### Manual access
 ```bash
 # On instance
 jupyter lab --ip=0.0.0.0 --port=8888
 # From local machine with tunnel
 ssh -L 8888:localhost:8888 ubuntu@<IP>
 # Open http://localhost:8888
 ```
 ## Training workflows
 ### Single-GPU training
 ```bash
 # SSH to instance
 ssh ubuntu@<IP>
 # Clone repo
 git clone https://github.com/user/project
 cd project
 # Install dependencies
 pip install -r requirements.txt
 # Train
 python train.py --epochs 100 --checkpoint-dir /lambda/nfs/storage/checkpoints
 ```
 ### Multi-GPU training (single node)
 ```python
 # train_ddp.py
 import torch
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 def main():
    dist.init_process_group("nccl")
    rank = dist.get_rank()
    device = rank % torch.cuda.device_count()
    model = MyModel().to(device)
    model = DDP(model, device_ids=[device])
    # Training loop...
 if __name__ == "__main__":
    main()
 ```
 ```bash
 # Launch with torchrun (8 GPUs)
 torchrun --nproc_per_node=8 train_ddp.py
 ```
 ### Checkpoint to filesystem
 ```python
 import os
 checkpoint_dir = "/lambda/nfs/my-storage/checkpoints"
 os.makedirs(checkpoint_dir, exist_ok=True)
 # Save checkpoint
 torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
 }, f"{checkpoint_dir}/checkpoint_{epoch}.pt")
 ```
 ## 1-Click Clusters
 ### Overview
 High-performance Slurm clusters with:
 - 16-512 NVIDIA H100 or B200 GPUs
 - NVIDIA Quantum-2 400 Gb/s InfiniBand
 - GPUDirect RDMA at 3200 Gb/s
 - Pre-installed distributed ML stack
 ### Included software
 - Ubuntu 22.04 LTS + Lambda Stack
 - NCCL, Open MPI
 - PyTorch with DDP and FSDP
 - TensorFlow
 - OFED drivers
 ### Storage
 - 24 TB NVMe per compute node (ephemeral)
 - Lambda filesystems for persistent data
 ### Multi-node training
 ```bash
 # On Slurm cluster
 srun --nodes=4 --ntasks-per-node=8 --gpus-per-node=8 \
  torchrun --nnodes=4 --nproc_per_node=8 \
  --rdzv_backend=c10d --rdzv_endpoint=$MASTER_ADDR:29500 \
  train.py
 ```
 ## Networking
 ### Bandwidth
 - Inter-instance (same region): up to 200 Gbps
 - Internet outbound: 20 Gbps max
 ### Firewall
 - Default: Only port 22 (SSH) open
 - Configure additional ports in Lambda console
 - ICMP traffic allowed by default
 ### Private IPs
 ```bash
 # Find private IP
 ip addr show | grep 'inet '
 ```
 ## Common workflows
 ### Workflow 1: Fine-tuning LLM
 ```bash
 # 1. Launch 8x H100 instance with filesystem
 # 2. SSH and setup
 ssh ubuntu@<IP>
 pip install transformers accelerate peft
 # 3. Download model to filesystem
 python -c "
 from transformers import AutoModelForCausalLM
 model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf')
 model.save_pretrained('/lambda/nfs/storage/models/llama-2-7b')
 "
 # 4. Fine-tune with checkpoints on filesystem
 accelerate launch --num_processes 8 train.py \
  --model_path /lambda/nfs/storage/models/llama-2-7b \
  --output_dir /lambda/nfs/storage/outputs \
  --checkpoint_dir /lambda/nfs/storage/checkpoints
 ```
 ### Workflow 2: Batch inference
 ```bash
 # 1. Launch A10 instance (cost-effective for inference)
 # 2. Run inference
 python inference.py \
  --model /lambda/nfs/storage/models/fine-tuned \
  --input /lambda/nfs/storage/data/inputs.jsonl \
  --output /lambda/nfs/storage/data/outputs.jsonl
 ```
 ## Cost optimization
 ### Choose right GPU
 | Task | Recommended GPU |
 |------|-----------------|
 | LLM fine-tuning (7B) | A100 40GB |
 | LLM fine-tuning (70B) | 8x H100 |
 | Inference | A10, A6000 |
 | Development | V100, A10 |
 | Maximum performance | B200 |
 ### Reduce costs
 1. **Use filesystems**: Avoid re-downloading data
 2. **Checkpoint frequently**: Resume interrupted training
 3. **Right-size**: Don't over-provision GPUs
 4. **Terminate idle**: No auto-stop, manually terminate
 ### Monitor usage
 - Dashboard shows real-time GPU utilization
 - API for programmatic monitoring
 ## Common issues
 | Issue | Solution |
 |-------|----------|
 | Instance won't launch | Check region availability, try different GPU |
 | SSH connection refused | Wait for instance to initialize (3-15 min) |
 | Data lost after terminate | Use persistent filesystems |
 | Slow data transfer | Use filesystem in same region |
 | GPU not detected | Reboot instance, check drivers |
 ## References
 - **[Advanced Usage](references/advanced-usage.md)** - Multi-node training, API automation
 - **[Troubleshooting](references/troubleshooting.md)** - Common issues and solutions
 ## Resources
 - **Documentation**: https://docs.lambda.ai
 - **Console**: https://cloud.lambda.ai
 - **Pricing**: https://lambda.ai/instances
 - **Support**: https://support.lambdalabs.com
 - **Blog**: https://lambda.ai/blog
--- a/skills/mlops/lambda-labs/references/advanced-usage.md
+++ b/skills/mlops/lambda-labs/references/advanced-usage.md
@ -0,0 +1,611 @@
 # Lambda Labs Advanced Usage Guide
 ## Multi-Node Distributed Training
 ### PyTorch DDP across nodes
 ```python
 # train_multi_node.py
 import os
 import torch
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 def setup_distributed():
    # Environment variables set by launcher
    rank = int(os.environ["RANK"])
    world_size = int(os.environ["WORLD_SIZE"])
    local_rank = int(os.environ["LOCAL_RANK"])
    dist.init_process_group(
        backend="nccl",
        rank=rank,
        world_size=world_size
    )
    torch.cuda.set_device(local_rank)
    return rank, world_size, local_rank
 def main():
    rank, world_size, local_rank = setup_distributed()
    model = MyModel().cuda(local_rank)
    model = DDP(model, device_ids=[local_rank])
    # Training loop with synchronized gradients
    for epoch in range(num_epochs):
        train_one_epoch(model, dataloader)
        # Save checkpoint on rank 0 only
        if rank == 0:
            torch.save(model.module.state_dict(), f"checkpoint_{epoch}.pt")
    dist.destroy_process_group()
 if __name__ == "__main__":
    main()
 ```
 ### Launch on multiple instances
 ```bash
 # On Node 0 (master)
 export MASTER_ADDR=<NODE0_PRIVATE_IP>
 export MASTER_PORT=29500
 torchrun \
    --nnodes=2 \
    --nproc_per_node=8 \
    --node_rank=0 \
    --master_addr=$MASTER_ADDR \
    --master_port=$MASTER_PORT \
    train_multi_node.py
 # On Node 1
 export MASTER_ADDR=<NODE0_PRIVATE_IP>
 export MASTER_PORT=29500
 torchrun \
    --nnodes=2 \
    --nproc_per_node=8 \
    --node_rank=1 \
    --master_addr=$MASTER_ADDR \
    --master_port=$MASTER_PORT \
    train_multi_node.py
 ```
 ### FSDP for large models
 ```python
 from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
 from transformers.models.llama.modeling_llama import LlamaDecoderLayer
 # Wrap policy for transformer models
 auto_wrap_policy = functools.partial(
    transformer_auto_wrap_policy,
    transformer_layer_cls={LlamaDecoderLayer}
 )
 model = FSDP(
    model,
    auto_wrap_policy=auto_wrap_policy,
    mixed_precision=MixedPrecision(
        param_dtype=torch.bfloat16,
        reduce_dtype=torch.bfloat16,
        buffer_dtype=torch.bfloat16,
    ),
    device_id=local_rank,
 )
 ```
 ### DeepSpeed ZeRO
 ```python
 # ds_config.json
 {
    "train_batch_size": 64,
    "gradient_accumulation_steps": 4,
    "fp16": {"enabled": true},
    "zero_optimization": {
        "stage": 3,
        "offload_optimizer": {"device": "cpu"},
        "offload_param": {"device": "cpu"}
    }
 }
 ```
 ```bash
 # Launch with DeepSpeed
 deepspeed --num_nodes=2 \
    --num_gpus=8 \
    --hostfile=hostfile.txt \
    train.py --deepspeed ds_config.json
 ```
 ### Hostfile for multi-node
 ```bash
 # hostfile.txt
 node0_ip slots=8
 node1_ip slots=8
 ```
 ## API Automation
 ### Auto-launch training jobs
 ```python
 import os
 import time
 import lambda_cloud_client
 from lambda_cloud_client.models import LaunchInstanceRequest
 class LambdaJobManager:
    def __init__(self, api_key: str):
        self.config = lambda_cloud_client.Configuration(
            host="https://cloud.lambdalabs.com/api/v1",
            access_token=api_key
        )
    def find_available_gpu(self, gpu_types: list[str], regions: list[str] = None):
        """Find first available GPU type across regions."""
        with lambda_cloud_client.ApiClient(self.config) as client:
            api = lambda_cloud_client.DefaultApi(client)
            types = api.instance_types()
            for gpu_type in gpu_types:
                if gpu_type in types.data:
                    info = types.data[gpu_type]
                    for region in info.regions_with_capacity_available:
                        if regions is None or region.name in regions:
                            return gpu_type, region.name
        return None, None
    def launch_and_wait(self, instance_type: str, region: str,
                        ssh_key: str, filesystem: str = None,
                        timeout: int = 900) -> dict:
        """Launch instance and wait for it to be ready."""
        with lambda_cloud_client.ApiClient(self.config) as client:
            api = lambda_cloud_client.DefaultApi(client)
            request = LaunchInstanceRequest(
                region_name=region,
                instance_type_name=instance_type,
                ssh_key_names=[ssh_key],
                file_system_names=[filesystem] if filesystem else [],
            )
            response = api.launch_instance(request)
            instance_id = response.data.instance_ids[0]
            # Poll until ready
            start = time.time()
            while time.time() - start < timeout:
                instance = api.get_instance(instance_id)
                if instance.data.status == "active":
                    return {
                        "id": instance_id,
                        "ip": instance.data.ip,
                        "status": "active"
                    }
                time.sleep(30)
            raise TimeoutError(f"Instance {instance_id} not ready after {timeout}s")
    def terminate(self, instance_ids: list[str]):
        """Terminate instances."""
        from lambda_cloud_client.models import TerminateInstanceRequest
        with lambda_cloud_client.ApiClient(self.config) as client:
            api = lambda_cloud_client.DefaultApi(client)
            request = TerminateInstanceRequest(instance_ids=instance_ids)
            api.terminate_instance(request)
 # Usage
 manager = LambdaJobManager(os.environ["LAMBDA_API_KEY"])
 # Find available H100 or A100
 gpu_type, region = manager.find_available_gpu(
    ["gpu_8x_h100_sxm5", "gpu_8x_a100_80gb_sxm4"],
    regions=["us-west-1", "us-east-1"]
 )
 if gpu_type:
    instance = manager.launch_and_wait(
        gpu_type, region,
        ssh_key="my-key",
        filesystem="training-data"
    )
    print(f"Ready: ssh ubuntu@{instance['ip']}")
 ```
 ### Batch job submission
 ```python
 import subprocess
 import paramiko
 def run_remote_job(ip: str, ssh_key_path: str, commands: list[str]):
    """Execute commands on remote instance."""
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    client.connect(ip, username="ubuntu", key_filename=ssh_key_path)
    for cmd in commands:
        stdin, stdout, stderr = client.exec_command(cmd)
        print(stdout.read().decode())
        if stderr.read():
            print(f"Error: {stderr.read().decode()}")
    client.close()
 # Submit training job
 commands = [
    "cd /lambda/nfs/storage/project",
    "git pull",
    "pip install -r requirements.txt",
    "nohup torchrun --nproc_per_node=8 train.py > train.log 2>&1 &"
 ]
 run_remote_job(instance["ip"], "~/.ssh/lambda_key", commands)
 ```
 ### Monitor training progress
 ```python
 def monitor_job(ip: str, ssh_key_path: str, log_file: str = "train.log"):
    """Stream training logs from remote instance."""
    import time
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    client.connect(ip, username="ubuntu", key_filename=ssh_key_path)
    # Tail log file
    stdin, stdout, stderr = client.exec_command(f"tail -f {log_file}")
    try:
        for line in stdout:
            print(line.strip())
    except KeyboardInterrupt:
        pass
    finally:
        client.close()
 ```
 ## 1-Click Cluster Workflows
 ### Slurm job submission
 ```bash
 #!/bin/bash
 #SBATCH --job-name=llm-training
 #SBATCH --nodes=4
 #SBATCH --ntasks-per-node=8
 #SBATCH --gpus-per-node=8
 #SBATCH --time=24:00:00
 #SBATCH --output=logs/%j.out
 #SBATCH --error=logs/%j.err
 # Set up distributed environment
 export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
 export MASTER_PORT=29500
 # Launch training
 srun torchrun \
    --nnodes=$SLURM_NNODES \
    --nproc_per_node=$SLURM_GPUS_PER_NODE \
    --rdzv_backend=c10d \
    --rdzv_endpoint=$MASTER_ADDR:$MASTER_PORT \
    train.py \
    --config config.yaml
 ```
 ### Interactive cluster session
 ```bash
 # Request interactive session
 srun --nodes=1 --ntasks=1 --gpus=8 --time=4:00:00 --pty bash
 # Now on compute node with 8 GPUs
 nvidia-smi
 python train.py
 ```
 ### Monitoring cluster jobs
 ```bash
 # View job queue
 squeue
 # View job details
 scontrol show job <JOB_ID>
 # Cancel job
 scancel <JOB_ID>
 # View node status
 sinfo
 # View GPU usage across cluster
 srun --nodes=4 nvidia-smi --query-gpu=name,utilization.gpu --format=csv
 ```
 ## Advanced Filesystem Usage
 ### Data staging workflow
 ```bash
 # Stage data from S3 to filesystem (one-time)
 aws s3 sync s3://my-bucket/dataset /lambda/nfs/storage/datasets/
 # Or use rclone
 rclone sync s3:my-bucket/dataset /lambda/nfs/storage/datasets/
 ```
 ### Shared filesystem across instances
 ```python
 # Instance 1: Write checkpoints
 checkpoint_path = "/lambda/nfs/shared/checkpoints/model_step_1000.pt"
 torch.save(model.state_dict(), checkpoint_path)
 # Instance 2: Read checkpoints
 model.load_state_dict(torch.load(checkpoint_path))
 ```
 ### Filesystem best practices
 ```bash
 # Organize for ML workflows
 /lambda/nfs/storage/
 ├── datasets/
 │   ├── raw/           # Original data
 │   └── processed/     # Preprocessed data
 ├── models/
 │   ├── pretrained/    # Base models
 │   └── fine-tuned/    # Your trained models
 ├── checkpoints/
 │   └── experiment_1/  # Per-experiment checkpoints
 ├── logs/
 │   └── tensorboard/   # Training logs
 └── outputs/
    └── inference/     # Inference results
 ```
 ## Environment Management
 ### Custom Python environments
 ```bash
 # Don't modify system Python, create venv
 python -m venv ~/myenv
 source ~/myenv/bin/activate
 # Install packages
 pip install torch transformers accelerate
 # Save to filesystem for reuse
 cp -r ~/myenv /lambda/nfs/storage/envs/myenv
 ```
 ### Conda environments
 ```bash
 # Install miniconda (if not present)
 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/miniconda3
 # Create environment
 ~/miniconda3/bin/conda create -n ml python=3.10 pytorch pytorch-cuda=12.1 -c pytorch -c nvidia -y
 # Activate
 source ~/miniconda3/bin/activate ml
 ```
 ### Docker containers
 ```bash
 # Pull and run NVIDIA container
 docker run --gpus all -it --rm \
    -v /lambda/nfs/storage:/data \
    nvcr.io/nvidia/pytorch:24.01-py3
 # Run training in container
 docker run --gpus all -d \
    -v /lambda/nfs/storage:/data \
    -v $(pwd):/workspace \
    nvcr.io/nvidia/pytorch:24.01-py3 \
    python /workspace/train.py
 ```
 ## Monitoring and Observability
 ### GPU monitoring
 ```bash
 # Real-time GPU stats
 watch -n 1 nvidia-smi
 # GPU utilization over time
 nvidia-smi dmon -s u -d 1
 # Detailed GPU info
 nvidia-smi -q
 ```
 ### System monitoring
 ```bash
 # CPU and memory
 htop
 # Disk I/O
 iostat -x 1
 # Network
 iftop
 # All resources
 glances
 ```
 ### TensorBoard integration
 ```bash
 # Start TensorBoard
 tensorboard --logdir /lambda/nfs/storage/logs --port 6006 --bind_all
 # SSH tunnel from local machine
 ssh -L 6006:localhost:6006 ubuntu@<IP>
 # Access at http://localhost:6006
 ```
 ### Weights & Biases integration
 ```python
 import wandb
 # Initialize with API key
 wandb.login(key=os.environ["WANDB_API_KEY"])
 # Start run
 wandb.init(
    project="lambda-training",
    config={"learning_rate": 1e-4, "epochs": 100}
 )
 # Log metrics
 wandb.log({"loss": loss, "accuracy": acc})
 # Save artifacts to filesystem + W&B
 wandb.save("/lambda/nfs/storage/checkpoints/best_model.pt")
 ```
 ## Cost Optimization Strategies
 ### Checkpointing for interruption recovery
 ```python
 import os
 def save_checkpoint(model, optimizer, epoch, loss, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, path)
 def load_checkpoint(path, model, optimizer):
    if os.path.exists(path):
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        return checkpoint['epoch'], checkpoint['loss']
    return 0, float('inf')
 # Save every N steps to filesystem
 checkpoint_path = "/lambda/nfs/storage/checkpoints/latest.pt"
 if step % 1000 == 0:
    save_checkpoint(model, optimizer, epoch, loss, checkpoint_path)
 ```
 ### Instance selection by workload
 ```python
 def recommend_instance(model_params: int, batch_size: int, task: str) -> str:
    """Recommend Lambda instance based on workload."""
    if task == "inference":
        if model_params < 7e9:
            return "gpu_1x_a10"  # $0.75/hr
        elif model_params < 13e9:
            return "gpu_1x_a6000"  # $0.80/hr
        else:
            return "gpu_1x_h100_pcie"  # $2.49/hr
    elif task == "fine-tuning":
        if model_params < 7e9:
            return "gpu_1x_a100"  # $1.29/hr
        elif model_params < 13e9:
            return "gpu_4x_a100"  # $5.16/hr
        else:
            return "gpu_8x_h100_sxm5"  # $23.92/hr
    elif task == "pretraining":
        return "gpu_8x_h100_sxm5"  # Maximum performance
    return "gpu_1x_a100"  # Default
 ```
 ### Auto-terminate idle instances
 ```python
 import time
 from datetime import datetime, timedelta
 def auto_terminate_idle(api_key: str, idle_threshold_hours: float = 2):
    """Terminate instances idle for too long."""
    manager = LambdaJobManager(api_key)
    with lambda_cloud_client.ApiClient(manager.config) as client:
        api = lambda_cloud_client.DefaultApi(client)
        instances = api.list_instances()
        for instance in instances.data:
            # Check if instance has been running without activity
            # (You'd need to track this separately)
            launch_time = instance.launched_at
            if datetime.now() - launch_time > timedelta(hours=idle_threshold_hours):
                print(f"Terminating idle instance: {instance.id}")
                manager.terminate([instance.id])
 ```
 ## Security Best Practices
 ### SSH key rotation
 ```bash
 # Generate new key pair
 ssh-keygen -t ed25519 -f ~/.ssh/lambda_key_new -C "lambda-$(date +%Y%m)"
 # Add new key via Lambda console or API
 # Update authorized_keys on running instances
 ssh ubuntu@<IP> "echo '$(cat ~/.ssh/lambda_key_new.pub)' >> ~/.ssh/authorized_keys"
 # Test new key
 ssh -i ~/.ssh/lambda_key_new ubuntu@<IP>
 # Remove old key from Lambda console
 ```
 ### Firewall configuration
 ```bash
 # Lambda console: Only open necessary ports
 # Recommended:
 # - 22 (SSH) - Always needed
 # - 6006 (TensorBoard) - If using
 # - 8888 (Jupyter) - If using
 # - 29500 (PyTorch distributed) - For multi-node only
 ```
 ### Secrets management
 ```bash
 # Don't hardcode API keys in code
 # Use environment variables
 export HF_TOKEN="hf_..."
 export WANDB_API_KEY="..."
 # Or use .env file (add to .gitignore)
 source .env
 # On instance, store in ~/.bashrc
 echo 'export HF_TOKEN="..."' >> ~/.bashrc
 ```
--- a/skills/mlops/lambda-labs/references/troubleshooting.md
+++ b/skills/mlops/lambda-labs/references/troubleshooting.md
@ -0,0 +1,530 @@
 # Lambda Labs Troubleshooting Guide
 ## Instance Launch Issues
 ### No instances available
 **Error**: "No capacity available" or instance type not listed
 **Solutions**:
 ```bash
 # Check availability via API
 curl -u $LAMBDA_API_KEY: \
  https://cloud.lambdalabs.com/api/v1/instance-types | jq '.data | to_entries[] | select(.value.regions_with_capacity_available | length > 0) | .key'
 # Try different regions
 # US regions: us-west-1, us-east-1, us-south-1
 # International: eu-west-1, asia-northeast-1, etc.
 # Try alternative GPU types
 # H100 not available? Try A100
 # A100 not available? Try A10 or A6000
 ```
 ### Instance stuck launching
 **Problem**: Instance shows "booting" for over 20 minutes
 **Solutions**:
 ```bash
 # Single-GPU: Should be ready in 3-5 minutes
 # Multi-GPU (8x): May take 10-15 minutes
 # If stuck longer:
 # 1. Terminate the instance
 # 2. Try a different region
 # 3. Try a different instance type
 # 4. Contact Lambda support if persistent
 ```
 ### API authentication fails
 **Error**: `401 Unauthorized` or `403 Forbidden`
 **Solutions**:
 ```bash
 # Verify API key format (should start with specific prefix)
 echo $LAMBDA_API_KEY
 # Test API key
 curl -u $LAMBDA_API_KEY: \
  https://cloud.lambdalabs.com/api/v1/instance-types
 # Generate new API key from Lambda console if needed
 # Settings > API keys > Generate
 ```
 ### Quota limits reached
 **Error**: "Instance limit reached" or "Quota exceeded"
 **Solutions**:
 - Check current running instances in console
 - Terminate unused instances
 - Contact Lambda support to request quota increase
 - Use 1-Click Clusters for large-scale needs
 ## SSH Connection Issues
 ### Connection refused
 **Error**: `ssh: connect to host <IP> port 22: Connection refused`
 **Solutions**:
 ```bash
 # Wait for instance to fully initialize
 # Single-GPU: 3-5 minutes
 # Multi-GPU: 10-15 minutes
 # Check instance status in console (should be "active")
 # Verify correct IP address
 curl -u $LAMBDA_API_KEY: \
  https://cloud.lambdalabs.com/api/v1/instances | jq '.data[].ip'
 ```
 ### Permission denied
 **Error**: `Permission denied (publickey)`
 **Solutions**:
 ```bash
 # Verify SSH key matches
 ssh -v -i ~/.ssh/lambda_key ubuntu@<IP>
 # Check key permissions
 chmod 600 ~/.ssh/lambda_key
 chmod 644 ~/.ssh/lambda_key.pub
 # Verify key was added to Lambda console before launch
 # Keys must be added BEFORE launching instance
 # Check authorized_keys on instance (if you have another way in)
 cat ~/.ssh/authorized_keys
 ```
 ### Host key verification failed
 **Error**: `WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!`
 **Solutions**:
 ```bash
 # This happens when IP is reused by different instance
 # Remove old key
 ssh-keygen -R <IP>
 # Then connect again
 ssh ubuntu@<IP>
 ```
 ### Timeout during SSH
 **Error**: `ssh: connect to host <IP> port 22: Operation timed out`
 **Solutions**:
 ```bash
 # Check if instance is in "active" state
 # Verify firewall allows SSH (port 22)
 # Lambda console > Firewall
 # Check your local network allows outbound SSH
 # Try from different network/VPN
 ```
 ## GPU Issues
 ### GPU not detected
 **Error**: `nvidia-smi: command not found` or no GPUs shown
 **Solutions**:
 ```bash
 # Reboot instance
 sudo reboot
 # Reinstall NVIDIA drivers (if needed)
 wget -nv -O- https://lambdalabs.com/install-lambda-stack.sh | sh -
 sudo reboot
 # Check driver status
 nvidia-smi
 lsmod | grep nvidia
 ```
 ### CUDA out of memory
 **Error**: `torch.cuda.OutOfMemoryError: CUDA out of memory`
 **Solutions**:
 ```python
 # Check GPU memory
 import torch
 print(torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")
 # Clear cache
 torch.cuda.empty_cache()
 # Reduce batch size
 batch_size = batch_size // 2
 # Enable gradient checkpointing
 model.gradient_checkpointing_enable()
 # Use mixed precision
 from torch.cuda.amp import autocast
 with autocast():
    outputs = model(**inputs)
 # Use larger GPU instance
 # A100-40GB → A100-80GB → H100
 ```
 ### CUDA version mismatch
 **Error**: `CUDA driver version is insufficient for CUDA runtime version`
 **Solutions**:
 ```bash
 # Check versions
 nvidia-smi  # Shows driver CUDA version
 nvcc --version  # Shows toolkit version
 # Lambda Stack should have compatible versions
 # If mismatch, reinstall Lambda Stack
 wget -nv -O- https://lambdalabs.com/install-lambda-stack.sh | sh -
 sudo reboot
 # Or install specific PyTorch version
 pip install torch==2.1.0+cu121 -f https://download.pytorch.org/whl/torch_stable.html
 ```
 ### Multi-GPU not working
 **Error**: Only one GPU being used
 **Solutions**:
 ```python
 # Check all GPUs visible
 import torch
 print(f"GPUs available: {torch.cuda.device_count()}")
 # Verify CUDA_VISIBLE_DEVICES not set restrictively
 import os
 print(os.environ.get("CUDA_VISIBLE_DEVICES", "not set"))
 # Use DataParallel or DistributedDataParallel
 model = torch.nn.DataParallel(model)
 # or
 model = torch.nn.parallel.DistributedDataParallel(model)
 ```
 ## Filesystem Issues
 ### Filesystem not mounted
 **Error**: `/lambda/nfs/<name>` doesn't exist
 **Solutions**:
 ```bash
 # Filesystem must be attached at launch time
 # Cannot attach to running instance
 # Verify filesystem was selected during launch
 # Check mount points
 df -h | grep lambda
 # If missing, terminate and relaunch with filesystem
 ```
 ### Slow filesystem performance
 **Problem**: Reading/writing to filesystem is slow
 **Solutions**:
 ```bash
 # Use local SSD for temporary/intermediate files
 # /home/ubuntu has fast NVMe storage
 # Copy frequently accessed data to local storage
 cp -r /lambda/nfs/storage/dataset /home/ubuntu/dataset
 # Use filesystem for checkpoints and final outputs only
 # Check network bandwidth
 iperf3 -c <filesystem_server>
 ```
 ### Data lost after termination
 **Problem**: Files disappeared after instance terminated
 **Solutions**:
 ```bash
 # Root volume (/home/ubuntu) is EPHEMERAL
 # Data there is lost on termination
 # ALWAYS use filesystem for persistent data
 /lambda/nfs/<filesystem_name>/
 # Sync important local files before terminating
 rsync -av /home/ubuntu/outputs/ /lambda/nfs/storage/outputs/
 ```
 ### Filesystem full
 **Error**: `No space left on device`
 **Solutions**:
 ```bash
 # Check filesystem usage
 df -h /lambda/nfs/storage
 # Find large files
 du -sh /lambda/nfs/storage/* | sort -h
 # Clean up old checkpoints
 find /lambda/nfs/storage/checkpoints -mtime +7 -delete
 # Increase filesystem size in Lambda console
 # (may require support request)
 ```
 ## Network Issues
 ### Port not accessible
 **Error**: Cannot connect to service (TensorBoard, Jupyter, etc.)
 **Solutions**:
 ```bash
 # Lambda default: Only port 22 is open
 # Configure firewall in Lambda console
 # Or use SSH tunneling (recommended)
 ssh -L 6006:localhost:6006 ubuntu@<IP>
 # Access at http://localhost:6006
 # For Jupyter
 ssh -L 8888:localhost:8888 ubuntu@<IP>
 ```
 ### Slow data download
 **Problem**: Downloading datasets is slow
 **Solutions**:
 ```bash
 # Check available bandwidth
 speedtest-cli
 # Use multi-threaded download
 aria2c -x 16 <URL>
 # For HuggingFace models
 export HF_HUB_ENABLE_HF_TRANSFER=1
 pip install hf_transfer
 # For S3, use parallel transfer
 aws s3 sync s3://bucket/data /local/data --quiet
 ```
 ### Inter-node communication fails
 **Error**: Distributed training can't connect between nodes
 **Solutions**:
 ```bash
 # Verify nodes in same region (required)
 # Check private IPs can communicate
 ping <other_node_private_ip>
 # Verify NCCL settings
 export NCCL_DEBUG=INFO
 export NCCL_IB_DISABLE=0  # Enable InfiniBand if available
 # Check firewall allows distributed ports
 # Need: 29500 (PyTorch), or configured MASTER_PORT
 ```
 ## Software Issues
 ### Package installation fails
 **Error**: `pip install` errors
 **Solutions**:
 ```bash
 # Use virtual environment (don't modify system Python)
 python -m venv ~/myenv
 source ~/myenv/bin/activate
 pip install <package>
 # For CUDA packages, match CUDA version
 pip install torch --index-url https://download.pytorch.org/whl/cu121
 # Clear pip cache if corrupted
 pip cache purge
 ```
 ### Python version issues
 **Error**: Package requires different Python version
 **Solutions**:
 ```bash
 # Install alternate Python (don't replace system Python)
 sudo apt install python3.11 python3.11-venv python3.11-dev
 # Create venv with specific Python
 python3.11 -m venv ~/py311env
 source ~/py311env/bin/activate
 ```
 ### ImportError or ModuleNotFoundError
 **Error**: Module not found despite installation
 **Solutions**:
 ```bash
 # Verify correct Python environment
 which python
 pip list | grep <module>
 # Ensure virtual environment is activated
 source ~/myenv/bin/activate
 # Reinstall in correct environment
 pip uninstall <package>
 pip install <package>
 ```
 ## Training Issues
 ### Training hangs
 **Problem**: Training stops progressing, no output
 **Solutions**:
 ```bash
 # Check GPU utilization
 watch -n 1 nvidia-smi
 # If GPUs at 0%, likely data loading bottleneck
 # Increase num_workers in DataLoader
 # Check for deadlocks in distributed training
 export NCCL_DEBUG=INFO
 # Add timeouts
 dist.init_process_group(..., timeout=timedelta(minutes=30))
 ```
 ### Checkpoint corruption
 **Error**: `RuntimeError: storage has wrong size` or similar
 **Solutions**:
 ```python
 # Use safe saving pattern
 checkpoint_path = "/lambda/nfs/storage/checkpoint.pt"
 temp_path = checkpoint_path + ".tmp"
 # Save to temp first
 torch.save(state_dict, temp_path)
 # Then atomic rename
 os.rename(temp_path, checkpoint_path)
 # For loading corrupted checkpoint
 try:
    state = torch.load(checkpoint_path)
 except:
    # Fall back to previous checkpoint
    state = torch.load(checkpoint_path + ".backup")
 ```
 ### Memory leak
 **Problem**: Memory usage grows over time
 **Solutions**:
 ```python
 # Clear CUDA cache periodically
 torch.cuda.empty_cache()
 # Detach tensors when logging
 loss_value = loss.detach().cpu().item()
 # Don't accumulate gradients unintentionally
 optimizer.zero_grad(set_to_none=True)
 # Use gradient accumulation properly
 if (step + 1) % accumulation_steps == 0:
    optimizer.step()
    optimizer.zero_grad()
 ```
 ## Billing Issues
 ### Unexpected charges
 **Problem**: Bill higher than expected
 **Solutions**:
 ```bash
 # Check for forgotten running instances
 curl -u $LAMBDA_API_KEY: \
  https://cloud.lambdalabs.com/api/v1/instances | jq '.data[].id'
 # Terminate all instances
 # Lambda console > Instances > Terminate all
 # Lambda charges by the minute
 # No charge for stopped instances (but no "stop" feature - only terminate)
 ```
 ### Instance terminated unexpectedly
 **Problem**: Instance disappeared without manual termination
 **Possible causes**:
 - Payment issue (card declined)
 - Account suspension
 - Instance health check failure
 **Solutions**:
 - Check email for Lambda notifications
 - Verify payment method in console
 - Contact Lambda support
 - Always checkpoint to filesystem
 ## Common Error Messages
 | Error | Cause | Solution |
 |-------|-------|----------|
 | `No capacity available` | Region/GPU sold out | Try different region or GPU type |
 | `Permission denied (publickey)` | SSH key mismatch | Re-add key, check permissions |
 | `CUDA out of memory` | Model too large | Reduce batch size, use larger GPU |
 | `No space left on device` | Disk full | Clean up or use filesystem |
 | `Connection refused` | Instance not ready | Wait 3-15 minutes for boot |
 | `Module not found` | Wrong Python env | Activate correct virtualenv |
 ## Getting Help
 1. **Documentation**: https://docs.lambda.ai
 2. **Support**: https://support.lambdalabs.com
 3. **Email**: support@lambdalabs.com
 4. **Status**: Check Lambda status page for outages
 ### Information to Include
 When contacting support, include:
 - Instance ID
 - Region
 - Instance type
 - Error message (full traceback)
 - Steps to reproduce
 - Time of occurrence
--- a/skills/mlops/llama-cpp/SKILL.md
+++ b/skills/mlops/llama-cpp/SKILL.md
@ -0,0 +1,258 @@
 ---
 name: llama-cpp
 description: Runs LLM inference on CPU, Apple Silicon, and consumer GPUs without NVIDIA hardware. Use for edge deployment, M1/M2/M3 Macs, AMD/Intel GPUs, or when CUDA is unavailable. Supports GGUF quantization (1.5-8 bit) for reduced memory and 4-10× speedup vs PyTorch on CPU.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Inference Serving, Llama.cpp, CPU Inference, Apple Silicon, Edge Deployment, GGUF, Quantization, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded]
 dependencies: [llama-cpp-python]
 ---
 # llama.cpp
 Pure C/C++ LLM inference with minimal dependencies, optimized for CPUs and non-NVIDIA hardware.
 ## When to use llama.cpp
 **Use llama.cpp when:**
 - Running on CPU-only machines
 - Deploying on Apple Silicon (M1/M2/M3/M4)
 - Using AMD or Intel GPUs (no CUDA)
 - Edge deployment (Raspberry Pi, embedded systems)
 - Need simple deployment without Docker/Python
 **Use TensorRT-LLM instead when:**
 - Have NVIDIA GPUs (A100/H100)
 - Need maximum throughput (100K+ tok/s)
 - Running in datacenter with CUDA
 **Use vLLM instead when:**
 - Have NVIDIA GPUs
 - Need Python-first API
 - Want PagedAttention
 ## Quick start
 ### Installation
 ```bash
 # macOS/Linux
 brew install llama.cpp
 # Or build from source
 git clone https://github.com/ggerganov/llama.cpp
 cd llama.cpp
 make
 # With Metal (Apple Silicon)
 make LLAMA_METAL=1
 # With CUDA (NVIDIA)
 make LLAMA_CUDA=1
 # With ROCm (AMD)
 make LLAMA_HIP=1
 ```
 ### Download model
 ```bash
 # Download from HuggingFace (GGUF format)
 huggingface-cli download \
    TheBloke/Llama-2-7B-Chat-GGUF \
    llama-2-7b-chat.Q4_K_M.gguf \
    --local-dir models/
 # Or convert from HuggingFace
 python convert_hf_to_gguf.py models/llama-2-7b-chat/
 ```
 ### Run inference
 ```bash
 # Simple chat
 ./llama-cli \
    -m models/llama-2-7b-chat.Q4_K_M.gguf \
    -p "Explain quantum computing" \
    -n 256  # Max tokens
 # Interactive chat
 ./llama-cli \
    -m models/llama-2-7b-chat.Q4_K_M.gguf \
    --interactive
 ```
 ### Server mode
 ```bash
 # Start OpenAI-compatible server
 ./llama-server \
    -m models/llama-2-7b-chat.Q4_K_M.gguf \
    --host 0.0.0.0 \
    --port 8080 \
    -ngl 32  # Offload 32 layers to GPU
 # Client request
 curl http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama-2-7b-chat",
    "messages": [{"role": "user", "content": "Hello!"}],
    "temperature": 0.7,
    "max_tokens": 100
  }'
 ```
 ## Quantization formats
 ### GGUF format overview
 | Format | Bits | Size (7B) | Speed | Quality | Use Case |
 |--------|------|-----------|-------|---------|----------|
 | **Q4_K_M** | 4.5 | 4.1 GB | Fast | Good | **Recommended default** |
 | Q4_K_S | 4.3 | 3.9 GB | Faster | Lower | Speed critical |
 | Q5_K_M | 5.5 | 4.8 GB | Medium | Better | Quality critical |
 | Q6_K | 6.5 | 5.5 GB | Slower | Best | Maximum quality |
 | Q8_0 | 8.0 | 7.0 GB | Slow | Excellent | Minimal degradation |
 | Q2_K | 2.5 | 2.7 GB | Fastest | Poor | Testing only |
 ### Choosing quantization
 ```bash
 # General use (balanced)
 Q4_K_M  # 4-bit, medium quality
 # Maximum speed (more degradation)
 Q2_K or Q3_K_M
 # Maximum quality (slower)
 Q6_K or Q8_0
 # Very large models (70B, 405B)
 Q3_K_M or Q4_K_S  # Lower bits to fit in memory
 ```
 ## Hardware acceleration
 ### Apple Silicon (Metal)
 ```bash
 # Build with Metal
 make LLAMA_METAL=1
 # Run with GPU acceleration (automatic)
 ./llama-cli -m model.gguf -ngl 999  # Offload all layers
 # Performance: M3 Max 40-60 tokens/sec (Llama 2-7B Q4_K_M)
 ```
 ### NVIDIA GPUs (CUDA)
 ```bash
 # Build with CUDA
 make LLAMA_CUDA=1
 # Offload layers to GPU
 ./llama-cli -m model.gguf -ngl 35  # Offload 35/40 layers
 # Hybrid CPU+GPU for large models
 ./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20  # GPU: 20 layers, CPU: rest
 ```
 ### AMD GPUs (ROCm)
 ```bash
 # Build with ROCm
 make LLAMA_HIP=1
 # Run with AMD GPU
 ./llama-cli -m model.gguf -ngl 999
 ```
 ## Common patterns
 ### Batch processing
 ```bash
 # Process multiple prompts from file
 cat prompts.txt | ./llama-cli \
    -m model.gguf \
    --batch-size 512 \
    -n 100
 ```
 ### Constrained generation
 ```bash
 # JSON output with grammar
 ./llama-cli \
    -m model.gguf \
    -p "Generate a person: " \
    --grammar-file grammars/json.gbnf
 # Outputs valid JSON only
 ```
 ### Context size
 ```bash
 # Increase context (default 512)
 ./llama-cli \
    -m model.gguf \
    -c 4096  # 4K context window
 # Very long context (if model supports)
 ./llama-cli -m model.gguf -c 32768  # 32K context
 ```
 ## Performance benchmarks
 ### CPU performance (Llama 2-7B Q4_K_M)
 | CPU | Threads | Speed | Cost |
 |-----|---------|-------|------|
 | Apple M3 Max | 16 | 50 tok/s | $0 (local) |
 | AMD Ryzen 9 7950X | 32 | 35 tok/s | $0.50/hour |
 | Intel i9-13900K | 32 | 30 tok/s | $0.40/hour |
 | AWS c7i.16xlarge | 64 | 40 tok/s | $2.88/hour |
 ### GPU acceleration (Llama 2-7B Q4_K_M)
 | GPU | Speed | vs CPU | Cost |
 |-----|-------|--------|------|
 | NVIDIA RTX 4090 | 120 tok/s | 3-4× | $0 (local) |
 | NVIDIA A10 | 80 tok/s | 2-3× | $1.00/hour |
 | AMD MI250 | 70 tok/s | 2× | $2.00/hour |
 | Apple M3 Max (Metal) | 50 tok/s | ~Same | $0 (local) |
 ## Supported models
 **LLaMA family**:
 - Llama 2 (7B, 13B, 70B)
 - Llama 3 (8B, 70B, 405B)
 - Code Llama
 **Mistral family**:
 - Mistral 7B
 - Mixtral 8x7B, 8x22B
 **Other**:
 - Falcon, BLOOM, GPT-J
 - Phi-3, Gemma, Qwen
 - LLaVA (vision), Whisper (audio)
 **Find models**: https://huggingface.co/models?library=gguf
 ## References
 - **[Quantization Guide](references/quantization.md)** - GGUF formats, conversion, quality comparison
 - **[Server Deployment](references/server.md)** - API endpoints, Docker, monitoring
 - **[Optimization](references/optimization.md)** - Performance tuning, hybrid CPU+GPU
 ## Resources
 - **GitHub**: https://github.com/ggerganov/llama.cpp
 - **Models**: https://huggingface.co/models?library=gguf
 - **Discord**: https://discord.gg/llama-cpp
--- a/skills/mlops/llama-cpp/references/optimization.md
+++ b/skills/mlops/llama-cpp/references/optimization.md
@ -0,0 +1,89 @@
 # Performance Optimization Guide
 Maximize llama.cpp inference speed and efficiency.
 ## CPU Optimization
 ### Thread tuning
 ```bash
 # Set threads (default: physical cores)
 ./llama-cli -m model.gguf -t 8
 # For AMD Ryzen 9 7950X (16 cores, 32 threads)
 -t 16  # Best: physical cores
 # Avoid hyperthreading (slower for matrix ops)
 ```
 ### BLAS acceleration
 ```bash
 # OpenBLAS (faster matrix ops)
 make LLAMA_OPENBLAS=1
 # BLAS gives 2-3× speedup
 ```
 ## GPU Offloading
 ### Layer offloading
 ```bash
 # Offload 35 layers to GPU (hybrid mode)
 ./llama-cli -m model.gguf -ngl 35
 # Offload all layers
 ./llama-cli -m model.gguf -ngl 999
 # Find optimal value:
 # Start with -ngl 999
 # If OOM, reduce by 5 until fits
 ```
 ### Memory usage
 ```bash
 # Check VRAM usage
 nvidia-smi dmon
 # Reduce context if needed
 ./llama-cli -m model.gguf -c 2048  # 2K context instead of 4K
 ```
 ## Batch Processing
 ```bash
 # Increase batch size for throughput
 ./llama-cli -m model.gguf -b 512  # Default: 512
 # Physical batch (GPU)
 --ubatch 128  # Process 128 tokens at once
 ```
 ## Context Management
 ```bash
 # Default context (512 tokens)
 -c 512
 # Longer context (slower, more memory)
 -c 4096
 # Very long context (if model supports)
 -c 32768
 ```
 ## Benchmarks
 ### CPU Performance (Llama 2-7B Q4_K_M)
 | Setup | Speed | Notes |
 |-------|-------|-------|
 | Apple M3 Max | 50 tok/s | Metal acceleration |
 | AMD 7950X (16c) | 35 tok/s | OpenBLAS |
 | Intel i9-13900K | 30 tok/s | AVX2 |
 ### GPU Offloading (RTX 4090)
 | Layers GPU | Speed | VRAM |
 |------------|-------|------|
 | 0 (CPU only) | 30 tok/s | 0 GB |
 | 20 (hybrid) | 80 tok/s | 8 GB |
 | 35 (all) | 120 tok/s | 12 GB |
--- a/skills/mlops/llama-cpp/references/quantization.md
+++ b/skills/mlops/llama-cpp/references/quantization.md
@ -0,0 +1,213 @@
 # GGUF Quantization Guide
 Complete guide to GGUF quantization formats and model conversion.
 ## Quantization Overview
 **GGUF** (GPT-Generated Unified Format) - Standard format for llama.cpp models.
 ### Format Comparison
 | Format | Perplexity | Size (7B) | Tokens/sec | Notes |
 |--------|------------|-----------|------------|-------|
 | FP16 | 5.9565 (baseline) | 13.0 GB | 15 tok/s | Original quality |
 | Q8_0 | 5.9584 (+0.03%) | 7.0 GB | 25 tok/s | Nearly lossless |
 | **Q6_K** | 5.9642 (+0.13%) | 5.5 GB | 30 tok/s | Best quality/size |
 | **Q5_K_M** | 5.9796 (+0.39%) | 4.8 GB | 35 tok/s | Balanced |
 | **Q4_K_M** | 6.0565 (+1.68%) | 4.1 GB | 40 tok/s | **Recommended** |
 | Q4_K_S | 6.1125 (+2.62%) | 3.9 GB | 42 tok/s | Faster, lower quality |
 | Q3_K_M | 6.3184 (+6.07%) | 3.3 GB | 45 tok/s | Small models only |
 | Q2_K | 6.8673 (+15.3%) | 2.7 GB | 50 tok/s | Not recommended |
 **Recommendation**: Use **Q4_K_M** for best balance of quality and speed.
 ## Converting Models
 ### HuggingFace to GGUF
 ```bash
 # 1. Download HuggingFace model
 huggingface-cli download meta-llama/Llama-2-7b-chat-hf \
    --local-dir models/llama-2-7b-chat/
 # 2. Convert to FP16 GGUF
 python convert_hf_to_gguf.py \
    models/llama-2-7b-chat/ \
    --outtype f16 \
    --outfile models/llama-2-7b-chat-f16.gguf
 # 3. Quantize to Q4_K_M
 ./llama-quantize \
    models/llama-2-7b-chat-f16.gguf \
    models/llama-2-7b-chat-Q4_K_M.gguf \
    Q4_K_M
 ```
 ### Batch quantization
 ```bash
 # Quantize to multiple formats
 for quant in Q4_K_M Q5_K_M Q6_K Q8_0; do
    ./llama-quantize \
        model-f16.gguf \
        model-${quant}.gguf \
        $quant
 done
 ```
 ## K-Quantization Methods
 **K-quants** use mixed precision for better quality:
 - Attention weights: Higher precision
 - Feed-forward weights: Lower precision
 **Variants**:
 - `_S` (Small): Faster, lower quality
 - `_M` (Medium): Balanced (recommended)
 - `_L` (Large): Better quality, larger size
 **Example**: `Q4_K_M`
 - `Q4`: 4-bit quantization
 - `K`: Mixed precision method
 - `M`: Medium quality
 ## Quality Testing
 ```bash
 # Calculate perplexity (quality metric)
 ./llama-perplexity \
    -m model.gguf \
    -f wikitext-2-raw/wiki.test.raw \
    -c 512
 # Lower perplexity = better quality
 # Baseline (FP16): ~5.96
 # Q4_K_M: ~6.06 (+1.7%)
 # Q2_K: ~6.87 (+15.3% - too much degradation)
 ```
 ## Use Case Guide
 ### General purpose (chatbots, assistants)
 ```
 Q4_K_M - Best balance
 Q5_K_M - If you have extra RAM
 ```
 ### Code generation
 ```
 Q5_K_M or Q6_K - Higher precision helps with code
 ```
 ### Creative writing
 ```
 Q4_K_M - Sufficient quality
 Q3_K_M - Acceptable for draft generation
 ```
 ### Technical/medical
 ```
 Q6_K or Q8_0 - Maximum accuracy
 ```
 ### Edge devices (Raspberry Pi)
 ```
 Q2_K or Q3_K_S - Fit in limited RAM
 ```
 ## Model Size Scaling
 ### 7B parameter models
 | Format | Size | RAM needed |
 |--------|------|------------|
 | Q2_K | 2.7 GB | 5 GB |
 | Q3_K_M | 3.3 GB | 6 GB |
 | Q4_K_M | 4.1 GB | 7 GB |
 | Q5_K_M | 4.8 GB | 8 GB |
 | Q6_K | 5.5 GB | 9 GB |
 | Q8_0 | 7.0 GB | 11 GB |
 ### 13B parameter models
 | Format | Size | RAM needed |
 |--------|------|------------|
 | Q2_K | 5.1 GB | 8 GB |
 | Q3_K_M | 6.2 GB | 10 GB |
 | Q4_K_M | 7.9 GB | 12 GB |
 | Q5_K_M | 9.2 GB | 14 GB |
 | Q6_K | 10.7 GB | 16 GB |
 ### 70B parameter models
 | Format | Size | RAM needed |
 |--------|------|------------|
 | Q2_K | 26 GB | 32 GB |
 | Q3_K_M | 32 GB | 40 GB |
 | Q4_K_M | 41 GB | 48 GB |
 | Q4_K_S | 39 GB | 46 GB |
 | Q5_K_M | 48 GB | 56 GB |
 **Recommendation for 70B**: Use Q3_K_M or Q4_K_S to fit in consumer hardware.
 ## Finding Pre-Quantized Models
 **TheBloke** on HuggingFace:
 - https://huggingface.co/TheBloke
 - Most models available in all GGUF formats
 - No conversion needed
 **Example**:
 ```bash
 # Download pre-quantized Llama 2-7B
 huggingface-cli download \
    TheBloke/Llama-2-7B-Chat-GGUF \
    llama-2-7b-chat.Q4_K_M.gguf \
    --local-dir models/
 ```
 ## Importance Matrices (imatrix)
 **What**: Calibration data to improve quantization quality.
 **Benefits**:
 - 10-20% perplexity improvement with Q4
 - Essential for Q3 and below
 **Usage**:
 ```bash
 # 1. Generate importance matrix
 ./llama-imatrix \
    -m model-f16.gguf \
    -f calibration-data.txt \
    -o model.imatrix
 # 2. Quantize with imatrix
 ./llama-quantize \
    --imatrix model.imatrix \
    model-f16.gguf \
    model-Q4_K_M.gguf \
    Q4_K_M
 ```
 **Calibration data**:
 - Use domain-specific text (e.g., code for code models)
 - ~100MB of representative text
 - Higher quality data = better quantization
 ## Troubleshooting
 **Model outputs gibberish**:
 - Quantization too aggressive (Q2_K)
 - Try Q4_K_M or Q5_K_M
 - Verify model converted correctly
 **Out of memory**:
 - Use lower quantization (Q4_K_S instead of Q5_K_M)
 - Offload fewer layers to GPU (`-ngl`)
 - Use smaller context (`-c 2048`)
 **Slow inference**:
 - Higher quantization uses more compute
 - Q8_0 much slower than Q4_K_M
 - Consider speed vs quality trade-off
--- a/skills/mlops/llama-cpp/references/server.md
+++ b/skills/mlops/llama-cpp/references/server.md
@ -0,0 +1,125 @@
 # Server Deployment Guide
 Production deployment of llama.cpp server with OpenAI-compatible API.
 ## Server Modes
 ### llama-server
 ```bash
 # Basic server
 ./llama-server \
    -m models/llama-2-7b-chat.Q4_K_M.gguf \
    --host 0.0.0.0 \
    --port 8080 \
    -c 4096  # Context size
 # With GPU acceleration
 ./llama-server \
    -m models/llama-2-70b.Q4_K_M.gguf \
    -ngl 40  # Offload 40 layers to GPU
 ```
 ## OpenAI-Compatible API
 ### Chat completions
 ```bash
 curl http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama-2",
    "messages": [
      {"role": "system", "content": "You are helpful"},
      {"role": "user", "content": "Hello"}
    ],
    "temperature": 0.7,
    "max_tokens": 100
  }'
 ```
 ### Streaming
 ```bash
 curl http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama-2",
    "messages": [{"role": "user", "content": "Count to 10"}],
    "stream": true
  }'
 ```
 ## Docker Deployment
 **Dockerfile**:
 ```dockerfile
 FROM ubuntu:22.04
 RUN apt-get update && apt-get install -y git build-essential
 RUN git clone https://github.com/ggerganov/llama.cpp
 WORKDIR /llama.cpp
 RUN make LLAMA_CUDA=1
 COPY models/ /models/
 EXPOSE 8080
 CMD ["./llama-server", "-m", "/models/model.gguf", "--host", "0.0.0.0", "--port", "8080"]
 ```
 **Run**:
 ```bash
 docker run --gpus all -p 8080:8080 llama-cpp:latest
 ```
 ## Monitoring
 ```bash
 # Server metrics endpoint
 curl http://localhost:8080/metrics
 # Health check
 curl http://localhost:8080/health
 ```
 **Metrics**:
 - requests_total
 - tokens_generated
 - prompt_tokens
 - completion_tokens
 - kv_cache_tokens
 ## Load Balancing
 **NGINX**:
 ```nginx
 upstream llama_cpp {
    server llama1:8080;
    server llama2:8080;
 }
 server {
    location / {
        proxy_pass http://llama_cpp;
        proxy_read_timeout 300s;
    }
 }
 ```
 ## Performance Tuning
 **Parallel requests**:
 ```bash
 ./llama-server \
    -m model.gguf \
    -np 4  # 4 parallel slots
 ```
 **Continuous batching**:
 ```bash
 ./llama-server \
    -m model.gguf \
    --cont-batching  # Enable continuous batching
 ```
 **Context caching**:
 ```bash
 ./llama-server \
    -m model.gguf \
    --cache-prompt  # Cache processed prompts
 ```
--- a/skills/mlops/llava/SKILL.md
+++ b/skills/mlops/llava/SKILL.md
@ -0,0 +1,304 @@
 ---
 name: llava
 description: Large Language and Vision Assistant. Enables visual instruction tuning and image-based conversations. Combines CLIP vision encoder with Vicuna/LLaMA language models. Supports multi-turn image chat, visual question answering, and instruction following. Use for vision-language chatbots or image understanding tasks. Best for conversational image analysis.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [LLaVA, Vision-Language, Multimodal, Visual Question Answering, Image Chat, CLIP, Vicuna, Conversational AI, Instruction Tuning, VQA]
 dependencies: [transformers, torch, pillow]
 ---
 # LLaVA - Large Language and Vision Assistant
 Open-source vision-language model for conversational image understanding.
 ## When to use LLaVA
 **Use when:**
 - Building vision-language chatbots
 - Visual question answering (VQA)
 - Image description and captioning
 - Multi-turn image conversations
 - Visual instruction following
 - Document understanding with images
 **Metrics**:
 - **23,000+ GitHub stars**
 - GPT-4V level capabilities (targeted)
 - Apache 2.0 License
 - Multiple model sizes (7B-34B params)
 **Use alternatives instead**:
 - **GPT-4V**: Highest quality, API-based
 - **CLIP**: Simple zero-shot classification
 - **BLIP-2**: Better for captioning only
 - **Flamingo**: Research, not open-source
 ## Quick start
 ### Installation
 ```bash
 # Clone repository
 git clone https://github.com/haotian-liu/LLaVA
 cd LLaVA
 # Install
 pip install -e .
 ```
 ### Basic usage
 ```python
 from llava.model.builder import load_pretrained_model
 from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
 from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
 from llava.conversation import conv_templates
 from PIL import Image
 import torch
 # Load model
 model_path = "liuhaotian/llava-v1.5-7b"
 tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path=model_path,
    model_base=None,
    model_name=get_model_name_from_path(model_path)
 )
 # Load image
 image = Image.open("image.jpg")
 image_tensor = process_images([image], image_processor, model.config)
 image_tensor = image_tensor.to(model.device, dtype=torch.float16)
 # Create conversation
 conv = conv_templates["llava_v1"].copy()
 conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\nWhat is in this image?")
 conv.append_message(conv.roles[1], None)
 prompt = conv.get_prompt()
 # Generate response
 input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(model.device)
 with torch.inference_mode():
    output_ids = model.generate(
        input_ids,
        images=image_tensor,
        do_sample=True,
        temperature=0.2,
        max_new_tokens=512
    )
 response = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
 print(response)
 ```
 ## Available models
 | Model | Parameters | VRAM | Quality |
 |-------|------------|------|---------|
 | LLaVA-v1.5-7B | 7B | ~14 GB | Good |
 | LLaVA-v1.5-13B | 13B | ~28 GB | Better |
 | LLaVA-v1.6-34B | 34B | ~70 GB | Best |
 ```python
 # Load different models
 model_7b = "liuhaotian/llava-v1.5-7b"
 model_13b = "liuhaotian/llava-v1.5-13b"
 model_34b = "liuhaotian/llava-v1.6-34b"
 # 4-bit quantization for lower VRAM
 load_4bit = True  # Reduces VRAM by ~4×
 ```
 ## CLI usage
 ```bash
 # Single image query
 python -m llava.serve.cli \
    --model-path liuhaotian/llava-v1.5-7b \
    --image-file image.jpg \
    --query "What is in this image?"
 # Multi-turn conversation
 python -m llava.serve.cli \
    --model-path liuhaotian/llava-v1.5-7b \
    --image-file image.jpg
 # Then type questions interactively
 ```
 ## Web UI (Gradio)
 ```bash
 # Launch Gradio interface
 python -m llava.serve.gradio_web_server \
    --model-path liuhaotian/llava-v1.5-7b \
    --load-4bit  # Optional: reduce VRAM
 # Access at http://localhost:7860
 ```
 ## Multi-turn conversations
 ```python
 # Initialize conversation
 conv = conv_templates["llava_v1"].copy()
 # Turn 1
 conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\nWhat is in this image?")
 conv.append_message(conv.roles[1], None)
 response1 = generate(conv, model, image)  # "A dog playing in a park"
 # Turn 2
 conv.messages[-1][1] = response1  # Add previous response
 conv.append_message(conv.roles[0], "What breed is the dog?")
 conv.append_message(conv.roles[1], None)
 response2 = generate(conv, model, image)  # "Golden Retriever"
 # Turn 3
 conv.messages[-1][1] = response2
 conv.append_message(conv.roles[0], "What time of day is it?")
 conv.append_message(conv.roles[1], None)
 response3 = generate(conv, model, image)
 ```
 ## Common tasks
 ### Image captioning
 ```python
 question = "Describe this image in detail."
 response = ask(model, image, question)
 ```
 ### Visual question answering
 ```python
 question = "How many people are in the image?"
 response = ask(model, image, question)
 ```
 ### Object detection (textual)
 ```python
 question = "List all the objects you can see in this image."
 response = ask(model, image, question)
 ```
 ### Scene understanding
 ```python
 question = "What is happening in this scene?"
 response = ask(model, image, question)
 ```
 ### Document understanding
 ```python
 question = "What is the main topic of this document?"
 response = ask(model, document_image, question)
 ```
 ## Training custom model
 ```bash
 # Stage 1: Feature alignment (558K image-caption pairs)
 bash scripts/v1_5/pretrain.sh
 # Stage 2: Visual instruction tuning (150K instruction data)
 bash scripts/v1_5/finetune.sh
 ```
 ## Quantization (reduce VRAM)
 ```python
 # 4-bit quantization
 tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path="liuhaotian/llava-v1.5-13b",
    model_base=None,
    model_name=get_model_name_from_path("liuhaotian/llava-v1.5-13b"),
    load_4bit=True  # Reduces VRAM ~4×
 )
 # 8-bit quantization
 load_8bit=True  # Reduces VRAM ~2×
 ```
 ## Best practices
 1. **Start with 7B model** - Good quality, manageable VRAM
 2. **Use 4-bit quantization** - Reduces VRAM significantly
 3. **GPU required** - CPU inference extremely slow
 4. **Clear prompts** - Specific questions get better answers
 5. **Multi-turn conversations** - Maintain conversation context
 6. **Temperature 0.2-0.7** - Balance creativity/consistency
 7. **max_new_tokens 512-1024** - For detailed responses
 8. **Batch processing** - Process multiple images sequentially
 ## Performance
 | Model | VRAM (FP16) | VRAM (4-bit) | Speed (tokens/s) |
 |-------|-------------|--------------|------------------|
 | 7B | ~14 GB | ~4 GB | ~20 |
 | 13B | ~28 GB | ~8 GB | ~12 |
 | 34B | ~70 GB | ~18 GB | ~5 |
 *On A100 GPU*
 ## Benchmarks
 LLaVA achieves competitive scores on:
 - **VQAv2**: 78.5%
 - **GQA**: 62.0%
 - **MM-Vet**: 35.4%
 - **MMBench**: 64.3%
 ## Limitations
 1. **Hallucinations** - May describe things not in image
 2. **Spatial reasoning** - Struggles with precise locations
 3. **Small text** - Difficulty reading fine print
 4. **Object counting** - Imprecise for many objects
 5. **VRAM requirements** - Need powerful GPU
 6. **Inference speed** - Slower than CLIP
 ## Integration with frameworks
 ### LangChain
 ```python
 from langchain.llms.base import LLM
 class LLaVALLM(LLM):
    def _call(self, prompt, stop=None):
        # Custom LLaVA inference
        return response
 llm = LLaVALLM()
 ```
 ### Gradio App
 ```python
 import gradio as gr
 def chat(image, text, history):
    response = ask_llava(model, image, text)
    return response
 demo = gr.ChatInterface(
    chat,
    additional_inputs=[gr.Image(type="pil")],
    title="LLaVA Chat"
 )
 demo.launch()
 ```
 ## Resources
 - **GitHub**: https://github.com/haotian-liu/LLaVA ⭐ 23,000+
 - **Paper**: https://arxiv.org/abs/2304.08485
 - **Demo**: https://llava.hliu.cc
 - **Models**: https://huggingface.co/liuhaotian
 - **License**: Apache 2.0
--- a/skills/mlops/llava/references/training.md
+++ b/skills/mlops/llava/references/training.md
@ -0,0 +1,197 @@
 # LLaVA Training Guide
 Guide to training and fine-tuning LLaVA models.
 ## Training stages
 ### Stage 1: Feature alignment (Pretraining)
 **Purpose**: Align vision encoder with language model
 **Data**: 558K image-caption pairs (CC3M subset)
 ```bash
 # Download pretrained projector or train from scratch
 bash scripts/v1_5/pretrain.sh
 ```
 **Configuration:**
 - Base model: Vicuna-7B or LLaMA-2-7B
 - Vision encoder: CLIP ViT-L/14
 - Training time: ~20 hours on 8× A100
 ### Stage 2: Visual instruction tuning
 **Purpose**: Teach model to follow visual instructions
 **Data**: 150K GPT-generated multimodal instruction data
 ```bash
 # Fine-tune with instruction data
 bash scripts/v1_5/finetune.sh
 ```
 **Configuration:**
 - Epochs: 1
 - Batch size: 128 (across 8 GPUs)
 - Learning rate: 2e-5
 - Training time: ~24 hours on 8× A100
 ## Data format
 ### Instruction data format
 ```json
 [
    {
        "id": "001",
        "image": "path/to/image.jpg",
        "conversations": [
            {
                "from": "human",
                "value": "<image>\nWhat is in this image?"
            },
            {
                "from": "gpt",
                "value": "The image shows a dog playing in a park."
            },
            {
                "from": "human",
                "value": "What breed is the dog?"
            },
            {
                "from": "gpt",
                "value": "It appears to be a Golden Retriever."
            }
        ]
    }
 ]
 ```
 ## Fine-tuning on custom data
 ### Prepare your data
 ```python
 import json
 # Create instruction data
 data = []
 for image_path, qa_pairs in your_dataset:
    conversations = []
    for q, a in qa_pairs:
        conversations.append({"from": "human", "value": f"<image>\n{q}"})
        conversations.append({"from": "gpt", "value": a})
    data.append({
        "id": str(len(data)),
        "image": image_path,
        "conversations": conversations
    })
 # Save
 with open("custom_data.json", "w") as f:
    json.dump(data, f, indent=2)
 ```
 ### Fine-tune script
 ```bash
 #!/bin/bash
 # Set paths
 DATA_PATH="custom_data.json"
 IMAGE_FOLDER="path/to/images"
 MODEL_PATH="liuhaotian/llava-v1.5-7b"
 OUTPUT_DIR="./checkpoints/llava-custom"
 # Fine-tune
 deepspeed llava/train/train_mem.py \
    --deepspeed ./scripts/zero2.json \
    --model_name_or_path $MODEL_PATH \
    --version v1 \
    --data_path $DATA_PATH \
    --image_folder $IMAGE_FOLDER \
    --vision_tower openai/clip-vit-large-patch14-336 \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir $OUTPUT_DIR \
    --num_train_epochs 1 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-5 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb
 ```
 ## LoRA fine-tuning (memory efficient)
 ```python
 from peft import LoraConfig, get_peft_model
 # LoRA config
 lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
 )
 # Apply LoRA
 model = get_peft_model(base_model, lora_config)
 # Train with much lower memory
 ```
 ## Hardware requirements
 ### Full fine-tuning
 - **7B model**: 8× A100 (40GB)
 - **13B model**: 8× A100 (80GB)
 - **Training time**: 20-48 hours
 ### LoRA fine-tuning
 - **7B model**: 1× A100 (40GB)
 - **13B model**: 2× A100 (40GB)
 - **Training time**: 10-24 hours
 ## Best practices
 1. **Start with pretrained** - Don't train from scratch
 2. **Use LoRA for efficiency** - 10× less memory
 3. **Quality over quantity** - 1K high-quality > 10K low-quality
 4. **Multi-turn conversations** - More engaging than single Q&A
 5. **Diverse images** - Cover different scenarios
 6. **Clear instructions** - Specific questions get better answers
 7. **Monitor loss** - Should decrease smoothly
 8. **Save checkpoints** - Training can fail
 9. **Test regularly** - Validate on held-out set
 10. **Use DeepSpeed** - For multi-GPU training
 ## Resources
 - **Training script**: https://github.com/haotian-liu/LLaVA/tree/main/scripts
 - **Data format**: https://github.com/haotian-liu/LLaVA/blob/main/docs/Data.md
 - **Paper**: https://arxiv.org/abs/2304.08485
--- a/skills/mlops/lm-evaluation-harness/SKILL.md
+++ b/skills/mlops/lm-evaluation-harness/SKILL.md
@ -0,0 +1,490 @@
 ---
 name: evaluating-llms-harness
 description: Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. Supports HuggingFace, vLLM, APIs.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Evaluation, LM Evaluation Harness, Benchmarking, MMLU, HumanEval, GSM8K, EleutherAI, Model Quality, Academic Benchmarks, Industry Standard]
 dependencies: [lm-eval, transformers, vllm]
 ---
 # lm-evaluation-harness - LLM Benchmarking
 ## Quick start
 lm-evaluation-harness evaluates LLMs across 60+ academic benchmarks using standardized prompts and metrics.
 **Installation**:
 ```bash
 pip install lm-eval
 ```
 **Evaluate any HuggingFace model**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu,gsm8k,hellaswag \
  --device cuda:0 \
  --batch_size 8
 ```
 **View available tasks**:
 ```bash
 lm_eval --tasks list
 ```
 ## Common workflows
 ### Workflow 1: Standard benchmark evaluation
 Evaluate model on core benchmarks (MMLU, GSM8K, HumanEval).
 Copy this checklist:
 ```
 Benchmark Evaluation:
 - [ ] Step 1: Choose benchmark suite
 - [ ] Step 2: Configure model
 - [ ] Step 3: Run evaluation
 - [ ] Step 4: Analyze results
 ```
 **Step 1: Choose benchmark suite**
 **Core reasoning benchmarks**:
 - **MMLU** (Massive Multitask Language Understanding) - 57 subjects, multiple choice
 - **GSM8K** - Grade school math word problems
 - **HellaSwag** - Common sense reasoning
 - **TruthfulQA** - Truthfulness and factuality
 - **ARC** (AI2 Reasoning Challenge) - Science questions
 **Code benchmarks**:
 - **HumanEval** - Python code generation (164 problems)
 - **MBPP** (Mostly Basic Python Problems) - Python coding
 **Standard suite** (recommended for model releases):
 ```bash
 --tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge
 ```
 **Step 2: Configure model**
 **HuggingFace model**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf,dtype=bfloat16 \
  --tasks mmlu \
  --device cuda:0 \
  --batch_size auto  # Auto-detect optimal batch size
 ```
 **Quantized model (4-bit/8-bit)**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf,load_in_4bit=True \
  --tasks mmlu \
  --device cuda:0
 ```
 **Custom checkpoint**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=/path/to/my-model,tokenizer=/path/to/tokenizer \
  --tasks mmlu \
  --device cuda:0
 ```
 **Step 3: Run evaluation**
 ```bash
 # Full MMLU evaluation (57 subjects)
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu \
  --num_fewshot 5 \  # 5-shot evaluation (standard)
  --batch_size 8 \
  --output_path results/ \
  --log_samples  # Save individual predictions
 # Multiple benchmarks at once
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge \
  --num_fewshot 5 \
  --batch_size 8 \
  --output_path results/llama2-7b-eval.json
 ```
 **Step 4: Analyze results**
 Results saved to `results/llama2-7b-eval.json`:
 ```json
 {
  "results": {
    "mmlu": {
      "acc": 0.459,
      "acc_stderr": 0.004
    },
    "gsm8k": {
      "exact_match": 0.142,
      "exact_match_stderr": 0.006
    },
    "hellaswag": {
      "acc_norm": 0.765,
      "acc_norm_stderr": 0.004
    }
  },
  "config": {
    "model": "hf",
    "model_args": "pretrained=meta-llama/Llama-2-7b-hf",
    "num_fewshot": 5
  }
 }
 ```
 ### Workflow 2: Track training progress
 Evaluate checkpoints during training.
 ```
 Training Progress Tracking:
 - [ ] Step 1: Set up periodic evaluation
 - [ ] Step 2: Choose quick benchmarks
 - [ ] Step 3: Automate evaluation
 - [ ] Step 4: Plot learning curves
 ```
 **Step 1: Set up periodic evaluation**
 Evaluate every N training steps:
 ```bash
 #!/bin/bash
 # eval_checkpoint.sh
 CHECKPOINT_DIR=$1
 STEP=$2
 lm_eval --model hf \
  --model_args pretrained=$CHECKPOINT_DIR/checkpoint-$STEP \
  --tasks gsm8k,hellaswag \
  --num_fewshot 0 \  # 0-shot for speed
  --batch_size 16 \
  --output_path results/step-$STEP.json
 ```
 **Step 2: Choose quick benchmarks**
 Fast benchmarks for frequent evaluation:
 - **HellaSwag**: ~10 minutes on 1 GPU
 - **GSM8K**: ~5 minutes
 - **PIQA**: ~2 minutes
 Avoid for frequent eval (too slow):
 - **MMLU**: ~2 hours (57 subjects)
 - **HumanEval**: Requires code execution
 **Step 3: Automate evaluation**
 Integrate with training script:
 ```python
 # In training loop
 if step % eval_interval == 0:
    model.save_pretrained(f"checkpoints/step-{step}")
    # Run evaluation
    os.system(f"./eval_checkpoint.sh checkpoints step-{step}")
 ```
 Or use PyTorch Lightning callbacks:
 ```python
 from pytorch_lightning import Callback
 class EvalHarnessCallback(Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        step = trainer.global_step
        checkpoint_path = f"checkpoints/step-{step}"
        # Save checkpoint
        trainer.save_checkpoint(checkpoint_path)
        # Run lm-eval
        os.system(f"lm_eval --model hf --model_args pretrained={checkpoint_path} ...")
 ```
 **Step 4: Plot learning curves**
 ```python
 import json
 import matplotlib.pyplot as plt
 # Load all results
 steps = []
 mmlu_scores = []
 for file in sorted(glob.glob("results/step-*.json")):
    with open(file) as f:
        data = json.load(f)
        step = int(file.split("-")[1].split(".")[0])
        steps.append(step)
        mmlu_scores.append(data["results"]["mmlu"]["acc"])
 # Plot
 plt.plot(steps, mmlu_scores)
 plt.xlabel("Training Step")
 plt.ylabel("MMLU Accuracy")
 plt.title("Training Progress")
 plt.savefig("training_curve.png")
 ```
 ### Workflow 3: Compare multiple models
 Benchmark suite for model comparison.
 ```
 Model Comparison:
 - [ ] Step 1: Define model list
 - [ ] Step 2: Run evaluations
 - [ ] Step 3: Generate comparison table
 ```
 **Step 1: Define model list**
 ```bash
 # models.txt
 meta-llama/Llama-2-7b-hf
 meta-llama/Llama-2-13b-hf
 mistralai/Mistral-7B-v0.1
 microsoft/phi-2
 ```
 **Step 2: Run evaluations**
 ```bash
 #!/bin/bash
 # eval_all_models.sh
 TASKS="mmlu,gsm8k,hellaswag,truthfulqa"
 while read model; do
    echo "Evaluating $model"
    # Extract model name for output file
    model_name=$(echo $model | sed 's/\//-/g')
    lm_eval --model hf \
      --model_args pretrained=$model,dtype=bfloat16 \
      --tasks $TASKS \
      --num_fewshot 5 \
      --batch_size auto \
      --output_path results/$model_name.json
 done < models.txt
 ```
 **Step 3: Generate comparison table**
 ```python
 import json
 import pandas as pd
 models = [
    "meta-llama-Llama-2-7b-hf",
    "meta-llama-Llama-2-13b-hf",
    "mistralai-Mistral-7B-v0.1",
    "microsoft-phi-2"
 ]
 tasks = ["mmlu", "gsm8k", "hellaswag", "truthfulqa"]
 results = []
 for model in models:
    with open(f"results/{model}.json") as f:
        data = json.load(f)
        row = {"Model": model.replace("-", "/")}
        for task in tasks:
            # Get primary metric for each task
            metrics = data["results"][task]
            if "acc" in metrics:
                row[task.upper()] = f"{metrics['acc']:.3f}"
            elif "exact_match" in metrics:
                row[task.upper()] = f"{metrics['exact_match']:.3f}"
        results.append(row)
 df = pd.DataFrame(results)
 print(df.to_markdown(index=False))
 ```
 Output:
 ```
 | Model                  | MMLU  | GSM8K | HELLASWAG | TRUTHFULQA |
 |------------------------|-------|-------|-----------|------------|
 | meta-llama/Llama-2-7b  | 0.459 | 0.142 | 0.765     | 0.391      |
 | meta-llama/Llama-2-13b | 0.549 | 0.287 | 0.801     | 0.430      |
 | mistralai/Mistral-7B   | 0.626 | 0.395 | 0.812     | 0.428      |
 | microsoft/phi-2        | 0.560 | 0.613 | 0.682     | 0.447      |
 ```
 ### Workflow 4: Evaluate with vLLM (faster inference)
 Use vLLM backend for 5-10x faster evaluation.
 ```
 vLLM Evaluation:
 - [ ] Step 1: Install vLLM
 - [ ] Step 2: Configure vLLM backend
 - [ ] Step 3: Run evaluation
 ```
 **Step 1: Install vLLM**
 ```bash
 pip install vllm
 ```
 **Step 2: Configure vLLM backend**
 ```bash
 lm_eval --model vllm \
  --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8 \
  --tasks mmlu \
  --batch_size auto
 ```
 **Step 3: Run evaluation**
 vLLM is 5-10× faster than standard HuggingFace:
 ```bash
 # Standard HF: ~2 hours for MMLU on 7B model
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu \
  --batch_size 8
 # vLLM: ~15-20 minutes for MMLU on 7B model
 lm_eval --model vllm \
  --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=2 \
  --tasks mmlu \
  --batch_size auto
 ```
 ## When to use vs alternatives
 **Use lm-evaluation-harness when:**
 - Benchmarking models for academic papers
 - Comparing model quality across standard tasks
 - Tracking training progress
 - Reporting standardized metrics (everyone uses same prompts)
 - Need reproducible evaluation
 **Use alternatives instead:**
 - **HELM** (Stanford): Broader evaluation (fairness, efficiency, calibration)
 - **AlpacaEval**: Instruction-following evaluation with LLM judges
 - **MT-Bench**: Conversational multi-turn evaluation
 - **Custom scripts**: Domain-specific evaluation
 ## Common issues
 **Issue: Evaluation too slow**
 Use vLLM backend:
 ```bash
 lm_eval --model vllm \
  --model_args pretrained=model-name,tensor_parallel_size=2
 ```
 Or reduce fewshot examples:
 ```bash
 --num_fewshot 0  # Instead of 5
 ```
 Or evaluate subset of MMLU:
 ```bash
 --tasks mmlu_stem  # Only STEM subjects
 ```
 **Issue: Out of memory**
 Reduce batch size:
 ```bash
 --batch_size 1  # Or --batch_size auto
 ```
 Use quantization:
 ```bash
 --model_args pretrained=model-name,load_in_8bit=True
 ```
 Enable CPU offloading:
 ```bash
 --model_args pretrained=model-name,device_map=auto,offload_folder=offload
 ```
 **Issue: Different results than reported**
 Check fewshot count:
 ```bash
 --num_fewshot 5  # Most papers use 5-shot
 ```
 Check exact task name:
 ```bash
 --tasks mmlu  # Not mmlu_direct or mmlu_fewshot
 ```
 Verify model and tokenizer match:
 ```bash
 --model_args pretrained=model-name,tokenizer=same-model-name
 ```
 **Issue: HumanEval not executing code**
 Install execution dependencies:
 ```bash
 pip install human-eval
 ```
 Enable code execution:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=model-name \
  --tasks humaneval \
  --allow_code_execution  # Required for HumanEval
 ```
 ## Advanced topics
 **Benchmark descriptions**: See [references/benchmark-guide.md](references/benchmark-guide.md) for detailed description of all 60+ tasks, what they measure, and interpretation.
 **Custom tasks**: See [references/custom-tasks.md](references/custom-tasks.md) for creating domain-specific evaluation tasks.
 **API evaluation**: See [references/api-evaluation.md](references/api-evaluation.md) for evaluating OpenAI, Anthropic, and other API models.
 **Multi-GPU strategies**: See [references/distributed-eval.md](references/distributed-eval.md) for data parallel and tensor parallel evaluation.
 ## Hardware requirements
 - **GPU**: NVIDIA (CUDA 11.8+), works on CPU (very slow)
 - **VRAM**:
  - 7B model: 16GB (bf16) or 8GB (8-bit)
  - 13B model: 28GB (bf16) or 14GB (8-bit)
  - 70B model: Requires multi-GPU or quantization
 - **Time** (7B model, single A100):
  - HellaSwag: 10 minutes
  - GSM8K: 5 minutes
  - MMLU (full): 2 hours
  - HumanEval: 20 minutes
 ## Resources
 - GitHub: https://github.com/EleutherAI/lm-evaluation-harness
 - Docs: https://github.com/EleutherAI/lm-evaluation-harness/tree/main/docs
 - Task library: 60+ tasks including MMLU, GSM8K, HumanEval, TruthfulQA, HellaSwag, ARC, WinoGrande, etc.
 - Leaderboard: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard (uses this harness)
--- a/skills/mlops/lm-evaluation-harness/references/api-evaluation.md
+++ b/skills/mlops/lm-evaluation-harness/references/api-evaluation.md
@ -0,0 +1,490 @@
 # API Evaluation
 Guide to evaluating OpenAI, Anthropic, and other API-based language models.
 ## Overview
 The lm-evaluation-harness supports evaluating API-based models through a unified `TemplateAPI` interface. This allows benchmarking of:
 - OpenAI models (GPT-4, GPT-3.5, etc.)
 - Anthropic models (Claude 3, Claude 2, etc.)
 - Local OpenAI-compatible APIs
 - Custom API endpoints
 **Why evaluate API models**:
 - Benchmark closed-source models
 - Compare API models to open models
 - Validate API performance
 - Track model updates over time
 ## Supported API Models
 | Provider | Model Type | Request Types | Logprobs |
 |----------|------------|---------------|----------|
 | OpenAI (completions) | `openai-completions` | All | ✅ Yes |
 | OpenAI (chat) | `openai-chat-completions` | `generate_until` only | ❌ No |
 | Anthropic (completions) | `anthropic-completions` | All | ❌ No |
 | Anthropic (chat) | `anthropic-chat` | `generate_until` only | ❌ No |
 | Local (OpenAI-compatible) | `local-completions` | Depends on server | Varies |
 **Note**: Models without logprobs can only be evaluated on generation tasks, not perplexity or loglikelihood tasks.
 ## OpenAI Models
 ### Setup
 ```bash
 export OPENAI_API_KEY=sk-...
 ```
 ### Completion Models (Legacy)
 **Available models**: `davinci-002`, `babbage-002`
 ```bash
 lm_eval --model openai-completions \
  --model_args model=davinci-002 \
  --tasks lambada_openai,hellaswag \
  --batch_size auto
 ```
 **Supports**:
 - `generate_until`: ✅
 - `loglikelihood`: ✅
 - `loglikelihood_rolling`: ✅
 ### Chat Models
 **Available models**: `gpt-4`, `gpt-4-turbo`, `gpt-3.5-turbo`
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args model=gpt-4-turbo \
  --tasks mmlu,gsm8k,humaneval \
  --num_fewshot 5 \
  --batch_size auto
 ```
 **Supports**:
 - `generate_until`: ✅
 - `loglikelihood`: ❌ (no logprobs)
 - `loglikelihood_rolling`: ❌
 **Important**: Chat models don't provide logprobs, so they can only be used with generation tasks (MMLU, GSM8K, HumanEval), not perplexity tasks.
 ### Configuration Options
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args \
    model=gpt-4-turbo,\
    base_url=https://api.openai.com/v1,\
    num_concurrent=5,\
    max_retries=3,\
    timeout=60,\
    batch_size=auto
 ```
 **Parameters**:
 - `model`: Model identifier (required)
 - `base_url`: API endpoint (default: OpenAI)
 - `num_concurrent`: Concurrent requests (default: 5)
 - `max_retries`: Retry failed requests (default: 3)
 - `timeout`: Request timeout in seconds (default: 60)
 - `tokenizer`: Tokenizer to use (default: matches model)
 - `tokenizer_backend`: `"tiktoken"` or `"huggingface"`
 ### Cost Management
 OpenAI charges per token. Estimate costs before running:
 ```python
 # Rough estimate
 num_samples = 1000
 avg_tokens_per_sample = 500  # input + output
 cost_per_1k_tokens = 0.01  # GPT-3.5 Turbo
 total_cost = (num_samples * avg_tokens_per_sample / 1000) * cost_per_1k_tokens
 print(f"Estimated cost: ${total_cost:.2f}")
 ```
 **Cost-saving tips**:
 - Use `--limit N` for testing
 - Start with `gpt-3.5-turbo` before `gpt-4`
 - Set `max_gen_toks` to minimum needed
 - Use `num_fewshot=0` for zero-shot when possible
 ## Anthropic Models
 ### Setup
 ```bash
 export ANTHROPIC_API_KEY=sk-ant-...
 ```
 ### Completion Models (Legacy)
 ```bash
 lm_eval --model anthropic-completions \
  --model_args model=claude-2.1 \
  --tasks lambada_openai,hellaswag \
  --batch_size auto
 ```
 ### Chat Models (Recommended)
 **Available models**: `claude-3-5-sonnet-20241022`, `claude-3-opus-20240229`, `claude-3-sonnet-20240229`, `claude-3-haiku-20240307`
 ```bash
 lm_eval --model anthropic-chat \
  --model_args model=claude-3-5-sonnet-20241022 \
  --tasks mmlu,gsm8k,humaneval \
  --num_fewshot 5 \
  --batch_size auto
 ```
 **Aliases**: `anthropic-chat-completions` (same as `anthropic-chat`)
 ### Configuration Options
 ```bash
 lm_eval --model anthropic-chat \
  --model_args \
    model=claude-3-5-sonnet-20241022,\
    base_url=https://api.anthropic.com,\
    num_concurrent=5,\
    max_retries=3,\
    timeout=60
 ```
 ### Cost Management
 Anthropic pricing (as of 2024):
 - Claude 3.5 Sonnet: $3.00 / 1M input, $15.00 / 1M output
 - Claude 3 Opus: $15.00 / 1M input, $75.00 / 1M output
 - Claude 3 Haiku: $0.25 / 1M input, $1.25 / 1M output
 **Budget-friendly strategy**:
 ```bash
 # Test on small sample first
 lm_eval --model anthropic-chat \
  --model_args model=claude-3-haiku-20240307 \
  --tasks mmlu \
  --limit 100
 # Then run full eval on best model
 lm_eval --model anthropic-chat \
  --model_args model=claude-3-5-sonnet-20241022 \
  --tasks mmlu \
  --num_fewshot 5
 ```
 ## Local OpenAI-Compatible APIs
 Many local inference servers expose OpenAI-compatible APIs (vLLM, Text Generation Inference, llama.cpp, Ollama).
 ### vLLM Local Server
 **Start server**:
 ```bash
 vllm serve meta-llama/Llama-2-7b-hf \
  --host 0.0.0.0 \
  --port 8000
 ```
 **Evaluate**:
 ```bash
 lm_eval --model local-completions \
  --model_args \
    model=meta-llama/Llama-2-7b-hf,\
    base_url=http://localhost:8000/v1,\
    num_concurrent=1 \
  --tasks mmlu,gsm8k \
  --batch_size auto
 ```
 ### Text Generation Inference (TGI)
 **Start server**:
 ```bash
 docker run --gpus all --shm-size 1g -p 8080:80 \
  ghcr.io/huggingface/text-generation-inference:latest \
  --model-id meta-llama/Llama-2-7b-hf
 ```
 **Evaluate**:
 ```bash
 lm_eval --model local-completions \
  --model_args \
    model=meta-llama/Llama-2-7b-hf,\
    base_url=http://localhost:8080/v1 \
  --tasks hellaswag,arc_challenge
 ```
 ### Ollama
 **Start server**:
 ```bash
 ollama serve
 ollama pull llama2:7b
 ```
 **Evaluate**:
 ```bash
 lm_eval --model local-completions \
  --model_args \
    model=llama2:7b,\
    base_url=http://localhost:11434/v1 \
  --tasks mmlu
 ```
 ### llama.cpp Server
 **Start server**:
 ```bash
 ./server -m models/llama-2-7b.gguf --host 0.0.0.0 --port 8080
 ```
 **Evaluate**:
 ```bash
 lm_eval --model local-completions \
  --model_args \
    model=llama2,\
    base_url=http://localhost:8080/v1 \
  --tasks gsm8k
 ```
 ## Custom API Implementation
 For custom API endpoints, subclass `TemplateAPI`:
 ### Create `my_api.py`
 ```python
 from lm_eval.models.api_models import TemplateAPI
 import requests
 class MyCustomAPI(TemplateAPI):
    """Custom API model."""
    def __init__(self, base_url, api_key, **kwargs):
        super().__init__(base_url=base_url, **kwargs)
        self.api_key = api_key
    def _create_payload(self, messages, gen_kwargs):
        """Create API request payload."""
        return {
            "messages": messages,
            "api_key": self.api_key,
            **gen_kwargs
        }
    def parse_generations(self, response):
        """Parse generation response."""
        return response.json()["choices"][0]["text"]
    def parse_logprobs(self, response):
        """Parse logprobs (if available)."""
        # Return None if API doesn't provide logprobs
        logprobs = response.json().get("logprobs")
        if logprobs:
            return logprobs["token_logprobs"]
        return None
 ```
 ### Register and Use
 ```python
 from lm_eval import evaluator
 from my_api import MyCustomAPI
 model = MyCustomAPI(
    base_url="https://api.example.com/v1",
    api_key="your-key"
 )
 results = evaluator.simple_evaluate(
    model=model,
    tasks=["mmlu", "gsm8k"],
    num_fewshot=5,
    batch_size="auto"
 )
 ```
 ## Comparing API and Open Models
 ### Side-by-Side Evaluation
 ```bash
 # Evaluate OpenAI GPT-4
 lm_eval --model openai-chat-completions \
  --model_args model=gpt-4-turbo \
  --tasks mmlu,gsm8k,hellaswag \
  --num_fewshot 5 \
  --output_path results/gpt4.json
 # Evaluate open Llama 2 70B
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-70b-hf,dtype=bfloat16 \
  --tasks mmlu,gsm8k,hellaswag \
  --num_fewshot 5 \
  --output_path results/llama2-70b.json
 # Compare results
 python scripts/compare_results.py \
  results/gpt4.json \
  results/llama2-70b.json
 ```
 ### Typical Comparisons
 | Model | MMLU | GSM8K | HumanEval | Cost |
 |-------|------|-------|-----------|------|
 | GPT-4 Turbo | 86.4% | 92.0% | 67.0% | $$$$ |
 | Claude 3 Opus | 86.8% | 95.0% | 84.9% | $$$$ |
 | GPT-3.5 Turbo | 70.0% | 57.1% | 48.1% | $$ |
 | Llama 2 70B | 68.9% | 56.8% | 29.9% | Free (self-host) |
 | Mixtral 8x7B | 70.6% | 58.4% | 40.2% | Free (self-host) |
 ## Best Practices
 ### Rate Limiting
 Respect API rate limits:
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args \
    model=gpt-4-turbo,\
    num_concurrent=3,\  # Lower concurrency
    timeout=120 \  # Longer timeout
  --tasks mmlu
 ```
 ### Reproducibility
 Set temperature to 0 for deterministic results:
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args model=gpt-4-turbo \
  --tasks mmlu \
  --gen_kwargs temperature=0.0
 ```
 Or use `seed` for sampling:
 ```bash
 lm_eval --model anthropic-chat \
  --model_args model=claude-3-5-sonnet-20241022 \
  --tasks gsm8k \
  --gen_kwargs temperature=0.7,seed=42
 ```
 ### Caching
 API models automatically cache responses to avoid redundant calls:
 ```bash
 # First run: makes API calls
 lm_eval --model openai-chat-completions \
  --model_args model=gpt-4-turbo \
  --tasks mmlu \
  --limit 100
 # Second run: uses cache (instant, free)
 lm_eval --model openai-chat-completions \
  --model_args model=gpt-4-turbo \
  --tasks mmlu \
  --limit 100
 ```
 Cache location: `~/.cache/lm_eval/`
 ### Error Handling
 APIs can fail. Use retries:
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args \
    model=gpt-4-turbo,\
    max_retries=5,\
    timeout=120 \
  --tasks mmlu
 ```
 ## Troubleshooting
 ### "Authentication failed"
 Check API key:
 ```bash
 echo $OPENAI_API_KEY  # Should print sk-...
 echo $ANTHROPIC_API_KEY  # Should print sk-ant-...
 ```
 ### "Rate limit exceeded"
 Reduce concurrency:
 ```bash
 --model_args num_concurrent=1
 ```
 Or add delays between requests.
 ### "Timeout error"
 Increase timeout:
 ```bash
 --model_args timeout=180
 ```
 ### "Model not found"
 For local APIs, verify server is running:
 ```bash
 curl http://localhost:8000/v1/models
 ```
 ### Cost Runaway
 Use `--limit` for testing:
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args model=gpt-4-turbo \
  --tasks mmlu \
  --limit 50  # Only 50 samples
 ```
 ## Advanced Features
 ### Custom Headers
 ```bash
 lm_eval --model local-completions \
  --model_args \
    base_url=http://api.example.com/v1,\
    header="Authorization: Bearer token,X-Custom: value"
 ```
 ### Disable SSL Verification (Development Only)
 ```bash
 lm_eval --model local-completions \
  --model_args \
    base_url=https://localhost:8000/v1,\
    verify_certificate=false
 ```
 ### Custom Tokenizer
 ```bash
 lm_eval --model openai-chat-completions \
  --model_args \
    model=gpt-4-turbo,\
    tokenizer=gpt2,\
    tokenizer_backend=huggingface
 ```
 ## References
 - OpenAI API: https://platform.openai.com/docs/api-reference
 - Anthropic API: https://docs.anthropic.com/claude/reference
 - TemplateAPI: `lm_eval/models/api_models.py`
 - OpenAI models: `lm_eval/models/openai_completions.py`
 - Anthropic models: `lm_eval/models/anthropic_llms.py`
--- a/skills/mlops/lm-evaluation-harness/references/benchmark-guide.md
+++ b/skills/mlops/lm-evaluation-harness/references/benchmark-guide.md
@ -0,0 +1,488 @@
 # Benchmark Guide
 Complete guide to all 60+ evaluation tasks in lm-evaluation-harness, what they measure, and how to interpret results.
 ## Overview
 The lm-evaluation-harness includes 60+ benchmarks spanning:
 - Language understanding (MMLU, GLUE)
 - Mathematical reasoning (GSM8K, MATH)
 - Code generation (HumanEval, MBPP)
 - Instruction following (IFEval, AlpacaEval)
 - Long-context understanding (LongBench)
 - Multilingual capabilities (AfroBench, NorEval)
 - Reasoning (BBH, ARC)
 - Truthfulness (TruthfulQA)
 **List all tasks**:
 ```bash
 lm_eval --tasks list
 ```
 ## Major Benchmarks
 ### MMLU (Massive Multitask Language Understanding)
 **What it measures**: Broad knowledge across 57 subjects (STEM, humanities, social sciences, law).
 **Task variants**:
 - `mmlu`: Original 57-subject benchmark
 - `mmlu_pro`: More challenging version with reasoning-focused questions
 - `mmlu_prox`: Multilingual extension
 **Format**: Multiple choice (4 options)
 **Example**:
 ```
 Question: What is the capital of France?
 A. Berlin
 B. Paris
 C. London
 D. Madrid
 Answer: B
 ```
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu \
  --num_fewshot 5
 ```
 **Interpretation**:
 - Random: 25% (chance)
 - GPT-3 (175B): 43.9%
 - GPT-4: 86.4%
 - Human expert: ~90%
 **Good for**: Assessing general knowledge and domain expertise.
 ### GSM8K (Grade School Math 8K)
 **What it measures**: Mathematical reasoning on grade-school level word problems.
 **Task variants**:
 - `gsm8k`: Base task
 - `gsm8k_cot`: With chain-of-thought prompting
 - `gsm_plus`: Adversarial variant with perturbations
 **Format**: Free-form generation, extract numerical answer
 **Example**:
 ```
 Question: A baker made 200 cookies. He sold 3/5 of them in the morning and 1/4 of the remaining in the afternoon. How many cookies does he have left?
 Answer: 60
 ```
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks gsm8k \
  --num_fewshot 5
 ```
 **Interpretation**:
 - Random: ~0%
 - GPT-3 (175B): 17.0%
 - GPT-4: 92.0%
 - Llama 2 70B: 56.8%
 **Good for**: Testing multi-step reasoning and arithmetic.
 ### HumanEval
 **What it measures**: Python code generation from docstrings (functional correctness).
 **Task variants**:
 - `humaneval`: Standard benchmark
 - `humaneval_instruct`: For instruction-tuned models
 **Format**: Code generation, execution-based evaluation
 **Example**:
 ```python
 def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """
 ```
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=codellama/CodeLlama-7b-hf \
  --tasks humaneval \
  --batch_size 1
 ```
 **Interpretation**:
 - Random: 0%
 - GPT-3 (175B): 0%
 - Codex: 28.8%
 - GPT-4: 67.0%
 - Code Llama 34B: 53.7%
 **Good for**: Evaluating code generation capabilities.
 ### BBH (BIG-Bench Hard)
 **What it measures**: 23 challenging reasoning tasks where models previously failed to beat humans.
 **Categories**:
 - Logical reasoning
 - Math word problems
 - Social understanding
 - Algorithmic reasoning
 **Format**: Multiple choice and free-form
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks bbh \
  --num_fewshot 3
 ```
 **Interpretation**:
 - Random: ~25%
 - GPT-3 (175B): 33.9%
 - PaLM 540B: 58.3%
 - GPT-4: 86.7%
 **Good for**: Testing advanced reasoning capabilities.
 ### IFEval (Instruction-Following Evaluation)
 **What it measures**: Ability to follow specific, verifiable instructions.
 **Instruction types**:
 - Format constraints (e.g., "answer in 3 sentences")
 - Length constraints (e.g., "use at least 100 words")
 - Content constraints (e.g., "include the word 'banana'")
 - Structural constraints (e.g., "use bullet points")
 **Format**: Free-form generation with rule-based verification
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-chat-hf \
  --tasks ifeval \
  --batch_size auto
 ```
 **Interpretation**:
 - Measures: Instruction adherence (not quality)
 - GPT-4: 86% instruction following
 - Claude 2: 84%
 **Good for**: Evaluating chat/instruct models.
 ### GLUE (General Language Understanding Evaluation)
 **What it measures**: Natural language understanding across 9 tasks.
 **Tasks**:
 - `cola`: Grammatical acceptability
 - `sst2`: Sentiment analysis
 - `mrpc`: Paraphrase detection
 - `qqp`: Question pairs
 - `stsb`: Semantic similarity
 - `mnli`: Natural language inference
 - `qnli`: Question answering NLI
 - `rte`: Recognizing textual entailment
 - `wnli`: Winograd schemas
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=bert-base-uncased \
  --tasks glue \
  --num_fewshot 0
 ```
 **Interpretation**:
 - BERT Base: 78.3 (GLUE score)
 - RoBERTa Large: 88.5
 - Human baseline: 87.1
 **Good for**: Encoder-only models, fine-tuning baselines.
 ### LongBench
 **What it measures**: Long-context understanding (4K-32K tokens).
 **21 tasks covering**:
 - Single-document QA
 - Multi-document QA
 - Summarization
 - Few-shot learning
 - Code completion
 - Synthetic tasks
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks longbench \
  --batch_size 1
 ```
 **Interpretation**:
 - Tests context utilization
 - Many models struggle beyond 4K tokens
 - GPT-4 Turbo: 54.3%
 **Good for**: Evaluating long-context models.
 ## Additional Benchmarks
 ### TruthfulQA
 **What it measures**: Model's propensity to be truthful vs. generate plausible-sounding falsehoods.
 **Format**: Multiple choice with 4-5 options
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks truthfulqa_mc2 \
  --batch_size auto
 ```
 **Interpretation**:
 - Larger models often score worse (more convincing lies)
 - GPT-3: 58.8%
 - GPT-4: 59.0%
 - Human: ~94%
 ### ARC (AI2 Reasoning Challenge)
 **What it measures**: Grade-school science questions.
 **Variants**:
 - `arc_easy`: Easier questions
 - `arc_challenge`: Harder questions requiring reasoning
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks arc_challenge \
  --num_fewshot 25
 ```
 **Interpretation**:
 - ARC-Easy: Most models >80%
 - ARC-Challenge random: 25%
 - GPT-4: 96.3%
 ### HellaSwag
 **What it measures**: Commonsense reasoning about everyday situations.
 **Format**: Choose most plausible continuation
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks hellaswag \
  --num_fewshot 10
 ```
 **Interpretation**:
 - Random: 25%
 - GPT-3: 78.9%
 - Llama 2 70B: 85.3%
 ### WinoGrande
 **What it measures**: Commonsense reasoning via pronoun resolution.
 **Example**:
 ```
 The trophy doesn't fit in the brown suitcase because _ is too large.
 A. the trophy
 B. the suitcase
 ```
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks winogrande \
  --num_fewshot 5
 ```
 ### PIQA
 **What it measures**: Physical commonsense reasoning.
 **Example**: "To clean a keyboard, use compressed air or..."
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks piqa
 ```
 ## Multilingual Benchmarks
 ### AfroBench
 **What it measures**: Performance across 64 African languages.
 **15 tasks**: NLU, text generation, knowledge, QA, math reasoning
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks afrobench
 ```
 ### NorEval
 **What it measures**: Norwegian language understanding (9 task categories).
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=NbAiLab/nb-gpt-j-6B \
  --tasks noreval
 ```
 ## Domain-Specific Benchmarks
 ### MATH
 **What it measures**: High-school competition math problems.
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks math \
  --num_fewshot 4
 ```
 **Interpretation**:
 - Very challenging
 - GPT-4: 42.5%
 - Minerva 540B: 33.6%
 ### MBPP (Mostly Basic Python Problems)
 **What it measures**: Python programming from natural language descriptions.
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=codellama/CodeLlama-7b-hf \
  --tasks mbpp \
  --batch_size 1
 ```
 ### DROP
 **What it measures**: Reading comprehension requiring discrete reasoning.
 **Command**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks drop
 ```
 ## Benchmark Selection Guide
 ### For General Purpose Models
 Run this suite:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu,gsm8k,hellaswag,arc_challenge,truthfulqa_mc2 \
  --num_fewshot 5
 ```
 ### For Code Models
 ```bash
 lm_eval --model hf \
  --model_args pretrained=codellama/CodeLlama-7b-hf \
  --tasks humaneval,mbpp \
  --batch_size 1
 ```
 ### For Chat/Instruct Models
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-chat-hf \
  --tasks ifeval,mmlu,gsm8k_cot \
  --batch_size auto
 ```
 ### For Long Context Models
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-3.1-8B \
  --tasks longbench \
  --batch_size 1
 ```
 ## Interpreting Results
 ### Understanding Metrics
 **Accuracy**: Percentage of correct answers (most common)
 **Exact Match (EM)**: Requires exact string match (strict)
 **F1 Score**: Balances precision and recall
 **BLEU/ROUGE**: Text generation similarity
 **Pass@k**: Percentage passing when generating k samples
 ### Typical Score Ranges
 | Model Size | MMLU | GSM8K | HumanEval | HellaSwag |
 |------------|------|-------|-----------|-----------|
 | 7B | 40-50% | 10-20% | 5-15% | 70-80% |
 | 13B | 45-55% | 20-35% | 15-25% | 75-82% |
 | 70B | 60-70% | 50-65% | 35-50% | 82-87% |
 | GPT-4 | 86% | 92% | 67% | 95% |
 ### Red Flags
 - **All tasks at random chance**: Model not trained properly
 - **Exact 0% on generation tasks**: Likely format/parsing issue
 - **Huge variance across runs**: Check seed/sampling settings
 - **Better than GPT-4 on everything**: Likely contamination
 ## Best Practices
 1. **Always report few-shot setting**: 0-shot, 5-shot, etc.
 2. **Run multiple seeds**: Report mean ± std
 3. **Check for data contamination**: Search training data for benchmark examples
 4. **Compare to published baselines**: Validate your setup
 5. **Report all hyperparameters**: Model, batch size, max tokens, temperature
 ## References
 - Task list: `lm_eval --tasks list`
 - Task README: `lm_eval/tasks/README.md`
 - Papers: See individual benchmark papers
--- a/skills/mlops/lm-evaluation-harness/references/custom-tasks.md
+++ b/skills/mlops/lm-evaluation-harness/references/custom-tasks.md
@ -0,0 +1,602 @@
 # Custom Tasks
 Complete guide to creating domain-specific evaluation tasks in lm-evaluation-harness.
 ## Overview
 Custom tasks allow you to evaluate models on your own datasets and metrics. Tasks are defined using YAML configuration files with optional Python utilities for complex logic.
 **Why create custom tasks**:
 - Evaluate on proprietary/domain-specific data
 - Test specific capabilities not covered by existing benchmarks
 - Create evaluation pipelines for internal models
 - Reproduce research experiments
 ## Quick Start
 ### Minimal Custom Task
 Create `my_tasks/simple_qa.yaml`:
 ```yaml
 task: simple_qa
 dataset_path: data/simple_qa.jsonl
 output_type: generate_until
 doc_to_text: "Question: {{question}}\nAnswer:"
 doc_to_target: "{{answer}}"
 metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
 ```
 **Run it**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks simple_qa \
  --include_path my_tasks/
 ```
 ## Task Configuration Reference
 ### Essential Fields
 ```yaml
 # Task identification
 task: my_custom_task           # Unique task name (required)
 task_alias: "My Task"          # Display name
 tag:                           # Tags for grouping
  - custom
  - domain_specific
 # Dataset configuration
 dataset_path: data/my_data.jsonl  # HuggingFace dataset or local path
 dataset_name: default             # Subset name (if applicable)
 training_split: train
 validation_split: validation
 test_split: test
 # Evaluation configuration
 output_type: generate_until    # or loglikelihood, multiple_choice
 num_fewshot: 5                 # Number of few-shot examples
 batch_size: auto               # Batch size
 # Prompt templates (Jinja2)
 doc_to_text: "Question: {{question}}"
 doc_to_target: "{{answer}}"
 # Metrics
 metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
 # Metadata
 metadata:
  version: 1.0
 ```
 ### Output Types
 **`generate_until`**: Free-form generation
 ```yaml
 output_type: generate_until
 generation_kwargs:
  max_gen_toks: 256
  until:
    - "\n"
    - "."
  temperature: 0.0
 ```
 **`loglikelihood`**: Compute log probability of targets
 ```yaml
 output_type: loglikelihood
 # Used for perplexity, classification
 ```
 **`multiple_choice`**: Choose from options
 ```yaml
 output_type: multiple_choice
 doc_to_choice: "{{choices}}"  # List of choices
 ```
 ## Data Formats
 ### Local JSONL File
 `data/my_data.jsonl`:
 ```json
 {"question": "What is 2+2?", "answer": "4"}
 {"question": "Capital of France?", "answer": "Paris"}
 ```
 **Task config**:
 ```yaml
 dataset_path: data/my_data.jsonl
 dataset_kwargs:
  data_files:
    test: data/my_data.jsonl
 ```
 ### HuggingFace Dataset
 ```yaml
 dataset_path: squad
 dataset_name: plain_text
 test_split: validation
 ```
 ### CSV File
 `data/my_data.csv`:
 ```csv
 question,answer,category
 What is 2+2?,4,math
 Capital of France?,Paris,geography
 ```
 **Task config**:
 ```yaml
 dataset_path: data/my_data.csv
 dataset_kwargs:
  data_files:
    test: data/my_data.csv
 ```
 ## Prompt Engineering
 ### Simple Template
 ```yaml
 doc_to_text: "Question: {{question}}\nAnswer:"
 doc_to_target: "{{answer}}"
 ```
 ### Conditional Logic
 ```yaml
 doc_to_text: |
  {% if context %}
  Context: {{context}}
  {% endif %}
  Question: {{question}}
  Answer:
 ```
 ### Multiple Choice
 ```yaml
 doc_to_text: |
  Question: {{question}}
  A. {{choices[0]}}
  B. {{choices[1]}}
  C. {{choices[2]}}
  D. {{choices[3]}}
  Answer:
 doc_to_target: "{{ 'ABCD'[answer_idx] }}"
 doc_to_choice: ["A", "B", "C", "D"]
 ```
 ### Few-Shot Formatting
 ```yaml
 fewshot_delimiter: "\n\n"        # Between examples
 target_delimiter: " "            # Between question and answer
 doc_to_text: "Q: {{question}}"
 doc_to_target: "A: {{answer}}"
 ```
 ## Custom Python Functions
 For complex logic, use Python functions in `utils.py`.
 ### Create `my_tasks/utils.py`
 ```python
 def process_docs(dataset):
    """Preprocess documents."""
    def _process(doc):
        # Custom preprocessing
        doc["question"] = doc["question"].strip().lower()
        return doc
    return dataset.map(_process)
 def doc_to_text(doc):
    """Custom prompt formatting."""
    context = doc.get("context", "")
    question = doc["question"]
    if context:
        return f"Context: {context}\nQuestion: {question}\nAnswer:"
    return f"Question: {question}\nAnswer:"
 def doc_to_target(doc):
    """Custom target extraction."""
    return doc["answer"].strip().lower()
 def aggregate_scores(items):
    """Custom metric aggregation."""
    correct = sum(1 for item in items if item == 1.0)
    total = len(items)
    return correct / total if total > 0 else 0.0
 ```
 ### Use in Task Config
 ```yaml
 task: my_custom_task
 dataset_path: data/my_data.jsonl
 # Use Python functions
 process_docs: !function utils.process_docs
 doc_to_text: !function utils.doc_to_text
 doc_to_target: !function utils.doc_to_target
 metric_list:
  - metric: exact_match
    aggregation: !function utils.aggregate_scores
    higher_is_better: true
 ```
 ## Real-World Examples
 ### Example 1: Domain QA Task
 **Goal**: Evaluate medical question answering.
 `medical_qa/medical_qa.yaml`:
 ```yaml
 task: medical_qa
 dataset_path: data/medical_qa.jsonl
 output_type: generate_until
 num_fewshot: 3
 doc_to_text: |
  Medical Question: {{question}}
  Context: {{context}}
  Answer (be concise):
 doc_to_target: "{{answer}}"
 generation_kwargs:
  max_gen_toks: 100
  until:
    - "\n\n"
  temperature: 0.0
 metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
  - metric: !function utils.medical_f1
    aggregation: mean
    higher_is_better: true
 filter_list:
  - name: lowercase
    filter:
      - function: lowercase
      - function: remove_whitespace
 metadata:
  version: 1.0
  domain: medical
 ```
 `medical_qa/utils.py`:
 ```python
 from sklearn.metrics import f1_score
 import re
 def medical_f1(predictions, references):
    """Custom F1 for medical terms."""
    pred_terms = set(extract_medical_terms(predictions[0]))
    ref_terms = set(extract_medical_terms(references[0]))
    if not pred_terms and not ref_terms:
        return 1.0
    if not pred_terms or not ref_terms:
        return 0.0
    tp = len(pred_terms & ref_terms)
    fp = len(pred_terms - ref_terms)
    fn = len(ref_terms - pred_terms)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    return 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
 def extract_medical_terms(text):
    """Extract medical terminology."""
    # Custom logic
    return re.findall(r'\b[A-Z][a-z]+(?:[A-Z][a-z]+)*\b', text)
 ```
 ### Example 2: Code Evaluation
 `code_eval/python_challenges.yaml`:
 ```yaml
 task: python_challenges
 dataset_path: data/python_problems.jsonl
 output_type: generate_until
 num_fewshot: 0
 doc_to_text: |
  Write a Python function to solve:
  {{problem_statement}}
  Function signature:
  {{function_signature}}
 doc_to_target: "{{canonical_solution}}"
 generation_kwargs:
  max_gen_toks: 512
  until:
    - "\n\nclass"
    - "\n\ndef"
  temperature: 0.2
 metric_list:
  - metric: !function utils.execute_code
    aggregation: mean
    higher_is_better: true
 process_results: !function utils.process_code_results
 metadata:
  version: 1.0
 ```
 `code_eval/utils.py`:
 ```python
 import subprocess
 import json
 def execute_code(predictions, references):
    """Execute generated code against test cases."""
    generated_code = predictions[0]
    test_cases = json.loads(references[0])
    try:
        # Execute code with test cases
        for test_input, expected_output in test_cases:
            result = execute_with_timeout(generated_code, test_input, timeout=5)
            if result != expected_output:
                return 0.0
        return 1.0
    except Exception:
        return 0.0
 def execute_with_timeout(code, input_data, timeout=5):
    """Safely execute code with timeout."""
    # Implementation with subprocess and timeout
    pass
 def process_code_results(doc, results):
    """Process code execution results."""
    return {
        "passed": results[0] == 1.0,
        "generated_code": results[1]
    }
 ```
 ### Example 3: Instruction Following
 `instruction_eval/instruction_eval.yaml`:
 ```yaml
 task: instruction_following
 dataset_path: data/instructions.jsonl
 output_type: generate_until
 num_fewshot: 0
 doc_to_text: |
  Instruction: {{instruction}}
  {% if constraints %}
  Constraints: {{constraints}}
  {% endif %}
  Response:
 doc_to_target: "{{expected_response}}"
 generation_kwargs:
  max_gen_toks: 256
  temperature: 0.7
 metric_list:
  - metric: !function utils.check_constraints
    aggregation: mean
    higher_is_better: true
  - metric: !function utils.semantic_similarity
    aggregation: mean
    higher_is_better: true
 process_docs: !function utils.add_constraint_checkers
 ```
 `instruction_eval/utils.py`:
 ```python
 from sentence_transformers import SentenceTransformer, util
 model = SentenceTransformer('all-MiniLM-L6-v2')
 def check_constraints(predictions, references):
    """Check if response satisfies constraints."""
    response = predictions[0]
    constraints = json.loads(references[0])
    satisfied = 0
    total = len(constraints)
    for constraint in constraints:
        if verify_constraint(response, constraint):
            satisfied += 1
    return satisfied / total if total > 0 else 1.0
 def verify_constraint(response, constraint):
    """Verify single constraint."""
    if constraint["type"] == "length":
        return len(response.split()) >= constraint["min_words"]
    elif constraint["type"] == "contains":
        return constraint["keyword"] in response.lower()
    # Add more constraint types
    return True
 def semantic_similarity(predictions, references):
    """Compute semantic similarity."""
    pred_embedding = model.encode(predictions[0])
    ref_embedding = model.encode(references[0])
    return float(util.cos_sim(pred_embedding, ref_embedding))
 def add_constraint_checkers(dataset):
    """Parse constraints into verifiable format."""
    def _parse(doc):
        # Parse constraint string into structured format
        doc["parsed_constraints"] = parse_constraints(doc.get("constraints", ""))
        return doc
    return dataset.map(_parse)
 ```
 ## Advanced Features
 ### Output Filtering
 ```yaml
 filter_list:
  - name: extract_answer
    filter:
      - function: regex
        regex_pattern: "Answer: (.*)"
        group: 1
      - function: lowercase
      - function: strip_whitespace
 ```
 ### Multiple Metrics
 ```yaml
 metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
  - metric: f1
    aggregation: mean
    higher_is_better: true
  - metric: bleu
    aggregation: mean
    higher_is_better: true
 ```
 ### Task Groups
 Create `my_tasks/_default.yaml`:
 ```yaml
 group: my_eval_suite
 task:
  - simple_qa
  - medical_qa
  - python_challenges
 ```
 **Run entire suite**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks my_eval_suite \
  --include_path my_tasks/
 ```
 ## Testing Your Task
 ### Validate Configuration
 ```bash
 # Test task loading
 lm_eval --tasks my_custom_task --include_path my_tasks/ --limit 0
 # Run on 5 samples
 lm_eval --model hf \
  --model_args pretrained=gpt2 \
  --tasks my_custom_task \
  --include_path my_tasks/ \
  --limit 5
 ```
 ### Debug Mode
 ```bash
 lm_eval --model hf \
  --model_args pretrained=gpt2 \
  --tasks my_custom_task \
  --include_path my_tasks/ \
  --limit 1 \
  --log_samples  # Save input/output samples
 ```
 ## Best Practices
 1. **Start simple**: Test with minimal config first
 2. **Version your tasks**: Use `metadata.version`
 3. **Document your metrics**: Explain custom metrics in comments
 4. **Test with multiple models**: Ensure robustness
 5. **Validate on known examples**: Include sanity checks
 6. **Use filters carefully**: Can hide errors
 7. **Handle edge cases**: Empty strings, missing fields
 ## Common Patterns
 ### Classification Task
 ```yaml
 output_type: loglikelihood
 doc_to_text: "Text: {{text}}\nLabel:"
 doc_to_target: " {{label}}"  # Space prefix important!
 metric_list:
  - metric: acc
    aggregation: mean
 ```
 ### Perplexity Evaluation
 ```yaml
 output_type: loglikelihood_rolling
 doc_to_text: "{{text}}"
 metric_list:
  - metric: perplexity
    aggregation: perplexity
 ```
 ### Ranking Task
 ```yaml
 output_type: loglikelihood
 doc_to_text: "Query: {{query}}\nPassage: {{passage}}\nRelevant:"
 doc_to_target: [" Yes", " No"]
 metric_list:
  - metric: acc
    aggregation: mean
 ```
 ## Troubleshooting
 **"Task not found"**: Check `--include_path` and task name
 **Empty results**: Verify `doc_to_text` and `doc_to_target` templates
 **Metric errors**: Ensure metric names are correct (exact_match, not exact-match)
 **Filter issues**: Test filters with `--log_samples`
 **Python function not found**: Check `!function module.function_name` syntax
 ## References
 - Task system: EleutherAI/lm-evaluation-harness docs
 - Example tasks: `lm_eval/tasks/` directory
 - TaskConfig: `lm_eval/api/task.py`
--- a/skills/mlops/lm-evaluation-harness/references/distributed-eval.md
+++ b/skills/mlops/lm-evaluation-harness/references/distributed-eval.md
@ -0,0 +1,519 @@
 # Distributed Evaluation
 Guide to running evaluation across multiple GPUs using data parallelism and tensor/pipeline parallelism.
 ## Overview
 Distributed evaluation speeds up benchmarking by:
 - **Data Parallelism**: Split evaluation samples across GPUs (each GPU has full model copy)
 - **Tensor Parallelism**: Split model weights across GPUs (for large models)
 - **Pipeline Parallelism**: Split model layers across GPUs (for very large models)
 **When to use**:
 - Data Parallel: Model fits on single GPU, want faster evaluation
 - Tensor/Pipeline Parallel: Model too large for single GPU
 ## HuggingFace Models (`hf`)
 ### Data Parallelism (Recommended)
 Each GPU loads a full copy of the model and processes a subset of evaluation data.
 **Single Node (8 GPUs)**:
 ```bash
 accelerate launch --multi_gpu --num_processes 8 \
  -m lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf,dtype=bfloat16 \
  --tasks mmlu,gsm8k,hellaswag \
  --batch_size 16
 ```
 **Speedup**: Near-linear (8 GPUs = ~8× faster)
 **Memory**: Each GPU needs full model (7B model ≈ 14GB × 8 = 112GB total)
 ### Tensor Parallelism (Model Sharding)
 Split model weights across GPUs for models too large for single GPU.
 **Without accelerate launcher**:
 ```bash
 lm_eval --model hf \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    parallelize=True,\
    dtype=bfloat16 \
  --tasks mmlu,gsm8k \
  --batch_size 8
 ```
 **With 8 GPUs**: 70B model (140GB) / 8 = 17.5GB per GPU ✅
 **Advanced sharding**:
 ```bash
 lm_eval --model hf \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    parallelize=True,\
    device_map_option=auto,\
    max_memory_per_gpu=40GB,\
    max_cpu_memory=100GB,\
    dtype=bfloat16 \
  --tasks mmlu
 ```
 **Options**:
 - `device_map_option`: `"auto"` (default), `"balanced"`, `"balanced_low_0"`
 - `max_memory_per_gpu`: Max memory per GPU (e.g., `"40GB"`)
 - `max_cpu_memory`: Max CPU memory for offloading
 - `offload_folder`: Disk offloading directory
 ### Combined Data + Tensor Parallelism
 Use both for very large models.
 **Example: 70B model on 16 GPUs (2 copies, 8 GPUs each)**:
 ```bash
 accelerate launch --multi_gpu --num_processes 2 \
  -m lm_eval --model hf \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    parallelize=True,\
    dtype=bfloat16 \
  --tasks mmlu \
  --batch_size 8
 ```
 **Result**: 2× speedup from data parallelism, 70B model fits via tensor parallelism
 ### Configuration with `accelerate config`
 Create `~/.cache/huggingface/accelerate/default_config.yaml`:
 ```yaml
 compute_environment: LOCAL_MACHINE
 distributed_type: MULTI_GPU
 num_machines: 1
 num_processes: 8
 gpu_ids: all
 mixed_precision: bf16
 ```
 **Then run**:
 ```bash
 accelerate launch -m lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu
 ```
 ## vLLM Models (`vllm`)
 vLLM provides highly optimized distributed inference.
 ### Tensor Parallelism
 **Single Node (4 GPUs)**:
 ```bash
 lm_eval --model vllm \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    tensor_parallel_size=4,\
    dtype=auto,\
    gpu_memory_utilization=0.9 \
  --tasks mmlu,gsm8k \
  --batch_size auto
 ```
 **Memory**: 70B model split across 4 GPUs = ~35GB per GPU
 ### Data Parallelism
 **Multiple model replicas**:
 ```bash
 lm_eval --model vllm \
  --model_args \
    pretrained=meta-llama/Llama-2-7b-hf,\
    data_parallel_size=4,\
    dtype=auto,\
    gpu_memory_utilization=0.8 \
  --tasks hellaswag,arc_challenge \
  --batch_size auto
 ```
 **Result**: 4 model replicas = 4× throughput
 ### Combined Tensor + Data Parallelism
 **Example: 8 GPUs = 4 TP × 2 DP**:
 ```bash
 lm_eval --model vllm \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    tensor_parallel_size=4,\
    data_parallel_size=2,\
    dtype=auto,\
    gpu_memory_utilization=0.85 \
  --tasks mmlu \
  --batch_size auto
 ```
 **Result**: 70B model fits (TP=4), 2× speedup (DP=2)
 ### Multi-Node vLLM
 vLLM doesn't natively support multi-node. Use Ray:
 ```bash
 # Start Ray cluster
 ray start --head --port=6379
 # Run evaluation
 lm_eval --model vllm \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    tensor_parallel_size=8,\
    dtype=auto \
  --tasks mmlu
 ```
 ## NVIDIA NeMo Models (`nemo_lm`)
 ### Data Replication
 **8 replicas on 8 GPUs**:
 ```bash
 torchrun --nproc-per-node=8 --no-python \
  lm_eval --model nemo_lm \
  --model_args \
    path=/path/to/model.nemo,\
    devices=8 \
  --tasks hellaswag,arc_challenge \
  --batch_size 32
 ```
 **Speedup**: Near-linear (8× faster)
 ### Tensor Parallelism
 **4-way tensor parallelism**:
 ```bash
 torchrun --nproc-per-node=4 --no-python \
  lm_eval --model nemo_lm \
  --model_args \
    path=/path/to/70b_model.nemo,\
    devices=4,\
    tensor_model_parallel_size=4 \
  --tasks mmlu,gsm8k \
  --batch_size 16
 ```
 ### Pipeline Parallelism
 **2 TP × 2 PP on 4 GPUs**:
 ```bash
 torchrun --nproc-per-node=4 --no-python \
  lm_eval --model nemo_lm \
  --model_args \
    path=/path/to/model.nemo,\
    devices=4,\
    tensor_model_parallel_size=2,\
    pipeline_model_parallel_size=2 \
  --tasks mmlu \
  --batch_size 8
 ```
 **Constraint**: `devices = TP × PP`
 ### Multi-Node NeMo
 Currently not supported by lm-evaluation-harness.
 ## SGLang Models (`sglang`)
 ### Tensor Parallelism
 ```bash
 lm_eval --model sglang \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    tp_size=4,\
    dtype=auto \
  --tasks gsm8k \
  --batch_size auto
 ```
 ### Data Parallelism (Deprecated)
 **Note**: SGLang is deprecating data parallelism. Use tensor parallelism instead.
 ```bash
 lm_eval --model sglang \
  --model_args \
    pretrained=meta-llama/Llama-2-7b-hf,\
    dp_size=4,\
    dtype=auto \
  --tasks mmlu
 ```
 ## Performance Comparison
 ### 70B Model Evaluation (MMLU, 5-shot)
 | Method | GPUs | Time | Memory/GPU | Notes |
 |--------|------|------|------------|-------|
 | HF (no parallel) | 1 | 8 hours | 140GB (OOM) | Won't fit |
 | HF (TP=8) | 8 | 2 hours | 17.5GB | Slower, fits |
 | HF (DP=8) | 8 | 1 hour | 140GB (OOM) | Won't fit |
 | vLLM (TP=4) | 4 | 30 min | 35GB | Fast! |
 | vLLM (TP=4, DP=2) | 8 | 15 min | 35GB | Fastest |
 ### 7B Model Evaluation (Multiple Tasks)
 | Method | GPUs | Time | Speedup |
 |--------|------|------|---------|
 | HF (single) | 1 | 4 hours | 1× |
 | HF (DP=4) | 4 | 1 hour | 4× |
 | HF (DP=8) | 8 | 30 min | 8× |
 | vLLM (DP=8) | 8 | 15 min | 16× |
 **Takeaway**: vLLM is significantly faster than HuggingFace for inference.
 ## Choosing Parallelism Strategy
 ### Decision Tree
 ```
 Model fits on single GPU?
 ├─ YES: Use data parallelism
 │   ├─ HF: accelerate launch --multi_gpu --num_processes N
 │   └─ vLLM: data_parallel_size=N (fastest)
 │
 └─ NO: Use tensor/pipeline parallelism
    ├─ Model < 70B:
    │   └─ vLLM: tensor_parallel_size=4
    ├─ Model 70-175B:
    │   ├─ vLLM: tensor_parallel_size=8
    │   └─ Or HF: parallelize=True
    └─ Model > 175B:
        └─ Contact framework authors
 ```
 ### Memory Estimation
 **Rule of thumb**:
 ```
 Memory (GB) = Parameters (B) × Precision (bytes) × 1.2 (overhead)
 ```
 **Examples**:
 - 7B FP16: 7 × 2 × 1.2 = 16.8GB ✅ Fits A100 40GB
 - 13B FP16: 13 × 2 × 1.2 = 31.2GB ✅ Fits A100 40GB
 - 70B FP16: 70 × 2 × 1.2 = 168GB ❌ Need TP=4 or TP=8
 - 70B BF16: 70 × 2 × 1.2 = 168GB (same as FP16)
 **With tensor parallelism**:
 ```
 Memory per GPU = Total Memory / TP
 ```
 - 70B on 4 GPUs: 168GB / 4 = 42GB per GPU ✅
 - 70B on 8 GPUs: 168GB / 8 = 21GB per GPU ✅
 ## Multi-Node Evaluation
 ### HuggingFace with SLURM
 **Submit job**:
 ```bash
 #!/bin/bash
 #SBATCH --nodes=4
 #SBATCH --gpus-per-node=8
 #SBATCH --ntasks-per-node=1
 srun accelerate launch --multi_gpu \
  --num_processes $((SLURM_NNODES * 8)) \
  -m lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu,gsm8k,hellaswag \
  --batch_size 16
 ```
 **Submit**:
 ```bash
 sbatch eval_job.sh
 ```
 ### Manual Multi-Node Setup
 **On each node, run**:
 ```bash
 accelerate launch \
  --multi_gpu \
  --num_machines 4 \
  --num_processes 32 \
  --main_process_ip $MASTER_IP \
  --main_process_port 29500 \
  --machine_rank $NODE_RANK \
  -m lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu
 ```
 **Environment variables**:
 - `MASTER_IP`: IP of rank 0 node
 - `NODE_RANK`: 0, 1, 2, 3 for each node
 ## Best Practices
 ### 1. Start Small
 Test on small sample first:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-70b-hf,parallelize=True \
  --tasks mmlu \
  --limit 100  # Just 100 samples
 ```
 ### 2. Monitor GPU Usage
 ```bash
 # Terminal 1: Run evaluation
 lm_eval --model hf ...
 # Terminal 2: Monitor
 watch -n 1 nvidia-smi
 ```
 Look for:
 - GPU utilization > 90%
 - Memory usage stable
 - All GPUs active
 ### 3. Optimize Batch Size
 ```bash
 # Auto batch size (recommended)
 --batch_size auto
 # Or tune manually
 --batch_size 16  # Start here
 --batch_size 32  # Increase if memory allows
 ```
 ### 4. Use Mixed Precision
 ```bash
 --model_args dtype=bfloat16  # Faster, less memory
 ```
 ### 5. Check Communication
 For data parallelism, check network bandwidth:
 ```bash
 # Should see InfiniBand or high-speed network
 nvidia-smi topo -m
 ```
 ## Troubleshooting
 ### "CUDA out of memory"
 **Solutions**:
 1. Increase tensor parallelism:
   ```bash
   --model_args tensor_parallel_size=8  # Was 4
   ```
 2. Reduce batch size:
   ```bash
   --batch_size 4  # Was 16
   ```
 3. Lower precision:
   ```bash
   --model_args dtype=int8  # Quantization
   ```
 ### "NCCL error" or Hanging
 **Check**:
 1. All GPUs visible: `nvidia-smi`
 2. NCCL installed: `python -c "import torch; print(torch.cuda.nccl.version())"`
 3. Network connectivity between nodes
 **Fix**:
 ```bash
 export NCCL_DEBUG=INFO  # Enable debug logging
 export NCCL_IB_DISABLE=0  # Use InfiniBand if available
 ```
 ### Slow Evaluation
 **Possible causes**:
 1. **Data loading bottleneck**: Preprocess dataset
 2. **Low GPU utilization**: Increase batch size
 3. **Communication overhead**: Reduce parallelism degree
 **Profile**:
 ```bash
 lm_eval --model hf \
  --model_args pretrained=meta-llama/Llama-2-7b-hf \
  --tasks mmlu \
  --limit 100 \
  --log_samples  # Check timing
 ```
 ### GPUs Imbalanced
 **Symptom**: GPU 0 at 100%, others at 50%
 **Solution**: Use `device_map_option=balanced`:
 ```bash
 --model_args parallelize=True,device_map_option=balanced
 ```
 ## Example Configurations
 ### Small Model (7B) - Fast Evaluation
 ```bash
 # 8 A100s, data parallel
 accelerate launch --multi_gpu --num_processes 8 \
  -m lm_eval --model hf \
  --model_args \
    pretrained=meta-llama/Llama-2-7b-hf,\
    dtype=bfloat16 \
  --tasks mmlu,gsm8k,hellaswag,arc_challenge \
  --num_fewshot 5 \
  --batch_size 32
 # Time: ~30 minutes
 ```
 ### Large Model (70B) - vLLM
 ```bash
 # 8 H100s, tensor parallel
 lm_eval --model vllm \
  --model_args \
    pretrained=meta-llama/Llama-2-70b-hf,\
    tensor_parallel_size=8,\
    dtype=auto,\
    gpu_memory_utilization=0.9 \
  --tasks mmlu,gsm8k,humaneval \
  --num_fewshot 5 \
  --batch_size auto
 # Time: ~1 hour
 ```
 ### Very Large Model (175B+)
 **Requires specialized setup - contact framework maintainers**
 ## References
 - HuggingFace Accelerate: https://huggingface.co/docs/accelerate/
 - vLLM docs: https://docs.vllm.ai/
 - NeMo docs: https://docs.nvidia.com/nemo-framework/
 - lm-eval distributed guide: `docs/model_guide.md`
--- a/skills/mlops/ml-paper-writing/SKILL.md
+++ b/skills/mlops/ml-paper-writing/SKILL.md
@ -0,0 +1,937 @@
 ---
 name: ml-paper-writing
 description: Write publication-ready ML/AI papers for NeurIPS, ICML, ICLR, ACL, AAAI, COLM. Use when drafting papers from research repos, structuring arguments, verifying citations, or preparing camera-ready submissions. Includes LaTeX templates, reviewer guidelines, and citation verification workflows.
 version: 1.0.0
 author: Orchestra Research
 license: MIT
 tags: [Academic Writing, NeurIPS, ICML, ICLR, ACL, AAAI, COLM, LaTeX, Paper Writing, Citations, Research]
 dependencies: [semanticscholar, arxiv, habanero, requests]
 ---
 # ML Paper Writing for Top AI Conferences
 Expert-level guidance for writing publication-ready papers targeting **NeurIPS, ICML, ICLR, ACL, AAAI, and COLM**. This skill combines writing philosophy from top researchers (Nanda, Farquhar, Karpathy, Lipton, Steinhardt) with practical tools: LaTeX templates, citation verification APIs, and conference checklists.
 ## Core Philosophy: Collaborative Writing
 **Paper writing is collaborative, but Claude should be proactive in delivering drafts.**
 The typical workflow starts with a research repository containing code, results, and experimental artifacts. Claude's role is to:
 1. **Understand the project** by exploring the repo, results, and existing documentation
 2. **Deliver a complete first draft** when confident about the contribution
 3. **Search literature** using web search and APIs to find relevant citations
 4. **Refine through feedback cycles** when the scientist provides input
 5. **Ask for clarification** only when genuinely uncertain about key decisions
 **Key Principle**: Be proactive. If the repo and results are clear, deliver a full draft. Don't block waiting for feedback on every section—scientists are busy. Produce something concrete they can react to, then iterate based on their response.
 ---
 ## ⚠️ CRITICAL: Never Hallucinate Citations
 **This is the most important rule in academic writing with AI assistance.**
 ### The Problem
 AI-generated citations have a **~40% error rate**. Hallucinated references—papers that don't exist, wrong authors, incorrect years, fabricated DOIs—are a serious form of academic misconduct that can result in desk rejection or retraction.
 ### The Rule
 **NEVER generate BibTeX entries from memory. ALWAYS fetch programmatically.**
 | Action | ✅ Correct | ❌ Wrong |
 |--------|-----------|----------|
 | Adding a citation | Search API → verify → fetch BibTeX | Write BibTeX from memory |
 | Uncertain about a paper | Mark as `[CITATION NEEDED]` | Guess the reference |
 | Can't find exact paper | Note: "placeholder - verify" | Invent similar-sounding paper |
 ### When You Can't Verify a Citation
 If you cannot programmatically verify a citation, you MUST:
 ```latex
 % EXPLICIT PLACEHOLDER - requires human verification
 \cite{PLACEHOLDER_author2024_verify_this}  % TODO: Verify this citation exists
 ```
 **Always tell the scientist**: "I've marked [X] citations as placeholders that need verification. I could not confirm these papers exist."
 ### Recommended: Install Exa MCP for Paper Search
 For the best paper search experience, install **Exa MCP** which provides real-time academic search:
 **Claude Code:**
 ```bash
 claude mcp add exa -- npx -y mcp-remote "https://mcp.exa.ai/mcp"
 ```
 **Cursor / VS Code** (add to MCP settings):
 ```json
 {
  "mcpServers": {
    "exa": {
      "type": "http",
      "url": "https://mcp.exa.ai/mcp"
    }
  }
 }
 ```
 Exa MCP enables searches like:
 - "Find papers on RLHF for language models published after 2023"
 - "Search for transformer architecture papers by Vaswani"
 - "Get recent work on sparse autoencoders for interpretability"
 Then verify results with Semantic Scholar API and fetch BibTeX via DOI.
 ---
 ## Workflow 0: Starting from a Research Repository
 When beginning paper writing, start by understanding the project:
 ```
 Project Understanding:
 - [ ] Step 1: Explore the repository structure
 - [ ] Step 2: Read README, existing docs, and key results
 - [ ] Step 3: Identify the main contribution with the scientist
 - [ ] Step 4: Find papers already cited in the codebase
 - [ ] Step 5: Search for additional relevant literature
 - [ ] Step 6: Outline the paper structure together
 - [ ] Step 7: Draft sections iteratively with feedback
 ```
 **Step 1: Explore the Repository**
 ```bash
 # Understand project structure
 ls -la
 find . -name "*.py" | head -20
 find . -name "*.md" -o -name "*.txt" | xargs grep -l -i "result\|conclusion\|finding"
 ```
 Look for:
 - `README.md` - Project overview and claims
 - `results/`, `outputs/`, `experiments/` - Key findings
 - `configs/` - Experimental settings
 - Existing `.bib` files or citation references
 - Any draft documents or notes
 **Step 2: Identify Existing Citations**
 Check for papers already referenced in the codebase:
 ```bash
 # Find existing citations
 grep -r "arxiv\|doi\|cite" --include="*.md" --include="*.bib" --include="*.py"
 find . -name "*.bib"
 ```
 These are high-signal starting points for Related Work—the scientist has already deemed them relevant.
 **Step 3: Clarify the Contribution**
 Before writing, explicitly confirm with the scientist:
 > "Based on my understanding of the repo, the main contribution appears to be [X].
 > The key results show [Y]. Is this the framing you want for the paper,
 > or should we emphasize different aspects?"
 **Never assume the narrative—always verify with the human.**
 **Step 4: Search for Additional Literature**
 Use web search to find relevant papers:
 ```
 Search queries to try:
 - "[main technique] + [application domain]"
 - "[baseline method] comparison"
 - "[problem name] state-of-the-art"
 - Author names from existing citations
 ```
 Then verify and retrieve BibTeX using the citation workflow below.
 **Step 5: Deliver a First Draft**
 **Be proactive—deliver a complete draft rather than asking permission for each section.**
 If the repo provides clear results and the contribution is apparent:
 1. Write the full first draft end-to-end
 2. Present the complete draft for feedback
 3. Iterate based on scientist's response
 If genuinely uncertain about framing or major claims:
 1. Draft what you can confidently
 2. Flag specific uncertainties: "I framed X as the main contribution—let me know if you'd prefer to emphasize Y instead"
 3. Continue with the draft rather than blocking
 **Questions to include with the draft** (not before):
 - "I emphasized X as the main contribution—adjust if needed"
 - "I highlighted results A, B, C—let me know if others are more important"
 - "Related work section includes [papers]—add any I missed"
 ---
 ## When to Use This Skill
 Use this skill when:
 - **Starting from a research repo** to write a paper
 - **Drafting or revising** specific sections
 - **Finding and verifying citations** for related work
 - **Formatting** for conference submission
 - **Resubmitting** to a different venue (format conversion)
 - **Iterating** on drafts with scientist feedback
 **Always remember**: First drafts are starting points for discussion, not final outputs.
 ---
 ## Balancing Proactivity and Collaboration
 **Default: Be proactive. Deliver drafts, then iterate.**
 | Confidence Level | Action |
 |-----------------|--------|
 | **High** (clear repo, obvious contribution) | Write full draft, deliver, iterate on feedback |
 | **Medium** (some ambiguity) | Write draft with flagged uncertainties, continue |
 | **Low** (major unknowns) | Ask 1-2 targeted questions, then draft |
 **Draft first, ask with the draft** (not before):
 | Section | Draft Autonomously | Flag With Draft |
 |---------|-------------------|-----------------|
 | Abstract | Yes | "Framed contribution as X—adjust if needed" |
 | Introduction | Yes | "Emphasized problem Y—correct if wrong" |
 | Methods | Yes | "Included details A, B, C—add missing pieces" |
 | Experiments | Yes | "Highlighted results 1, 2, 3—reorder if needed" |
 | Related Work | Yes | "Cited papers X, Y, Z—add any I missed" |
 **Only block for input when:**
 - Target venue is unclear (affects page limits, framing)
 - Multiple contradictory framings seem equally valid
 - Results seem incomplete or inconsistent
 - Explicit request to review before continuing
 **Don't block for:**
 - Word choice decisions
 - Section ordering
 - Which specific results to show (make a choice, flag it)
 - Citation completeness (draft with what you find, note gaps)
 ---
 ## The Narrative Principle
 **The single most critical insight**: Your paper is not a collection of experiments—it's a story with one clear contribution supported by evidence.
 Every successful ML paper centers on what Neel Nanda calls "the narrative": a short, rigorous, evidence-based technical story with a takeaway readers care about.
 **Three Pillars (must be crystal clear by end of introduction):**
 | Pillar | Description | Example |
 |--------|-------------|---------|
 | **The What** | 1-3 specific novel claims within cohesive theme | "We prove that X achieves Y under condition Z" |
 | **The Why** | Rigorous empirical evidence supporting claims | Strong baselines, experiments distinguishing hypotheses |
 | **The So What** | Why readers should care | Connection to recognized community problems |
 **If you cannot state your contribution in one sentence, you don't yet have a paper.**
 ---
 ## Paper Structure Workflow
 ### Workflow 1: Writing a Complete Paper (Iterative)
 Copy this checklist and track progress. **Each step involves drafting → feedback → revision:**
 ```
 Paper Writing Progress:
 - [ ] Step 1: Define the one-sentence contribution (with scientist)
 - [ ] Step 2: Draft Figure 1 → get feedback → revise
 - [ ] Step 3: Draft abstract → get feedback → revise
 - [ ] Step 4: Draft introduction → get feedback → revise
 - [ ] Step 5: Draft methods → get feedback → revise
 - [ ] Step 6: Draft experiments → get feedback → revise
 - [ ] Step 7: Draft related work → get feedback → revise
 - [ ] Step 8: Draft limitations → get feedback → revise
 - [ ] Step 9: Complete paper checklist (required)
 - [ ] Step 10: Final review cycle and submission
 ```
 **Step 1: Define the One-Sentence Contribution**
 **This step requires explicit confirmation from the scientist.**
 Before writing anything, articulate and verify:
 - What is the single thing your paper contributes?
 - What was not obvious or present before your work?
 > "I propose framing the contribution as: '[one sentence]'. Does this capture
 > what you see as the main takeaway? Should we adjust the emphasis?"
 **Step 2: Draft Figure 1**
 Figure 1 deserves special attention—many readers skip directly to it.
 - Convey core idea, approach, or most compelling result
 - Use vector graphics (PDF/EPS for plots)
 - Write captions that stand alone without main text
 - Ensure readability in black-and-white (8% of men have color vision deficiency)
 **Step 3: Write Abstract (5-Sentence Formula)**
 From Sebastian Farquhar (DeepMind):
 ```
 1. What you achieved: "We introduce...", "We prove...", "We demonstrate..."
 2. Why this is hard and important
 3. How you do it (with specialist keywords for discoverability)
 4. What evidence you have
 5. Your most remarkable number/result
 ```
 **Delete** generic openings like "Large language models have achieved remarkable success..."
 **Step 4: Write Introduction (1-1.5 pages max)**
 Must include:
 - 2-4 bullet contribution list (max 1-2 lines each in two-column format)
 - Clear problem statement
 - Brief approach overview
 - Methods should start by page 2-3 maximum
 **Step 5: Methods Section**
 Enable reimplementation:
 - Conceptual outline or pseudocode
 - All hyperparameters listed
 - Architectural details sufficient for reproduction
 - Present final design decisions; ablations go in experiments
 **Step 6: Experiments Section**
 For each experiment, explicitly state:
 - What claim it supports
 - How it connects to main contribution
 - Experimental setting (details in appendix)
 - What to observe: "the blue line shows X, which demonstrates Y"
 Requirements:
 - Error bars with methodology (standard deviation vs standard error)
 - Hyperparameter search ranges
 - Compute infrastructure (GPU type, total hours)
 - Seed-setting methods
 **Step 7: Related Work**
 Organize methodologically, not paper-by-paper:
 **Good:** "One line of work uses Floogledoodle's assumption [refs] whereas we use Doobersnoddle's assumption because..."
 **Bad:** "Snap et al. introduced X while Crackle et al. introduced Y."
 Cite generously—reviewers likely authored relevant papers.
 **Step 8: Limitations Section (REQUIRED)**
 All major conferences require this. Counter-intuitively, honesty helps:
 - Reviewers are instructed not to penalize honest limitation acknowledgment
 - Pre-empt criticisms by identifying weaknesses first
 - Explain why limitations don't undermine core claims
 **Step 9: Paper Checklist**
 NeurIPS, ICML, and ICLR all require paper checklists. See [references/checklists.md](references/checklists.md).
 ---
 ## Writing Philosophy for Top ML Conferences
 **This section distills the most important writing principles from leading ML researchers.** These aren't optional style suggestions—they're what separates accepted papers from rejected ones.
 > "A paper is a short, rigorous, evidence-based technical story with a takeaway readers care about." — Neel Nanda
 ### The Sources Behind This Guidance
 This skill synthesizes writing philosophy from researchers who have published extensively at top venues:
 | Source | Key Contribution | Link |
 |--------|-----------------|------|
 | **Neel Nanda** (Google DeepMind) | The Narrative Principle, What/Why/So What framework | [How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) |
 | **Sebastian Farquhar** (DeepMind) | 5-sentence abstract formula | [How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) |
 | **Gopen & Swan** | 7 principles of reader expectations | [Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) |
 | **Zachary Lipton** | Word choice, eliminating hedging | [Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) |
 | **Jacob Steinhardt** (UC Berkeley) | Precision, consistent terminology | [Writing Tips](https://bounded-regret.ghost.io/) |
 | **Ethan Perez** (Anthropic) | Micro-level clarity tips | [Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) |
 | **Andrej Karpathy** | Single contribution focus | Various lectures |
 **For deeper dives into any of these, see:**
 - [references/writing-guide.md](references/writing-guide.md) - Full explanations with examples
 - [references/sources.md](references/sources.md) - Complete bibliography
 ### Time Allocation (From Neel Nanda)
 Spend approximately **equal time** on each of:
 1. The abstract
 2. The introduction
 3. The figures
 4. Everything else combined
 **Why?** Most reviewers form judgments before reaching your methods. Readers encounter your paper as: **title → abstract → introduction → figures → maybe the rest.**
 ### Writing Style Guidelines
 #### Sentence-Level Clarity (Gopen & Swan's 7 Principles)
 These principles are based on how readers actually process prose. Violating them forces readers to spend cognitive effort on structure rather than content.
 | Principle | Rule | Example |
 |-----------|------|---------|
 | **Subject-verb proximity** | Keep subject and verb close | ❌ "The model, which was trained on..., achieves" → ✅ "The model achieves... after training on..." |
 | **Stress position** | Place emphasis at sentence ends | ❌ "Accuracy improves by 15% when using attention" → ✅ "When using attention, accuracy improves by **15%**" |
 | **Topic position** | Put context first, new info after | ✅ "Given these constraints, we propose..." |
 | **Old before new** | Familiar info → unfamiliar info | Link backward, then introduce new |
 | **One unit, one function** | Each paragraph makes one point | Split multi-point paragraphs |
 | **Action in verb** | Use verbs, not nominalizations | ❌ "We performed an analysis" → ✅ "We analyzed" |
 | **Context before new** | Set stage before presenting | Explain before showing equation |
 **Full 7 principles with detailed examples:** See [references/writing-guide.md](references/writing-guide.md#the-7-principles-of-reader-expectations)
 #### Micro-Level Tips (Ethan Perez)
 These small changes accumulate into significantly clearer prose:
 - **Minimize pronouns**: ❌ "This shows..." → ✅ "This result shows..."
 - **Verbs early**: Position verbs near sentence start
 - **Unfold apostrophes**: ❌ "X's Y" → ✅ "The Y of X" (when awkward)
 - **Delete filler words**: "actually," "a bit," "very," "really," "basically," "quite," "essentially"
 **Full micro-tips with examples:** See [references/writing-guide.md](references/writing-guide.md#micro-level-writing-tips)
 #### Word Choice (Zachary Lipton)
 - **Be specific**: ❌ "performance" → ✅ "accuracy" or "latency" (say what you mean)
 - **Eliminate hedging**: Drop "may" and "can" unless genuinely uncertain
 - **Avoid incremental vocabulary**: ❌ "combine," "modify," "expand" → ✅ "develop," "propose," "introduce"
 - **Delete intensifiers**: ❌ "provides *very* tight approximation" → ✅ "provides tight approximation"
 #### Precision Over Brevity (Jacob Steinhardt)
 - **Consistent terminology**: Different terms for same concept creates confusion. Pick one and stick with it.
 - **State assumptions formally**: Before theorems, list all assumptions explicitly
 - **Intuition + rigor**: Provide intuitive explanations alongside formal proofs
 ### What Reviewers Actually Read
 Understanding reviewer behavior helps prioritize your effort:
 | Paper Section | % Reviewers Who Read | Implication |
 |---------------|---------------------|-------------|
 | Abstract | 100% | Must be perfect |
 | Introduction | 90%+ (skimmed) | Front-load contribution |
 | Figures | Examined before methods | Figure 1 is critical |
 | Methods | Only if interested | Don't bury the lede |
 | Appendix | Rarely | Put only supplementary details |
 **Bottom line**: If your abstract and intro don't hook reviewers, they may never read your brilliant methods section.
 ---
 ## Conference Requirements Quick Reference
 | Conference | Page Limit | Extra for Camera-Ready | Key Requirement |
 |------------|------------|------------------------|-----------------|
 | **NeurIPS 2025** | 9 pages | +0 | Mandatory checklist, lay summary for accepted |
 | **ICML 2026** | 8 pages | +1 | Broader Impact Statement required |
 | **ICLR 2026** | 9 pages | +1 | LLM disclosure required, reciprocal reviewing |
 | **ACL 2025** | 8 pages (long) | varies | Limitations section mandatory |
 | **AAAI 2026** | 7 pages | +1 | Strict style file adherence |
 | **COLM 2025** | 9 pages | +1 | Focus on language models |
 **Universal Requirements:**
 - Double-blind review (anonymize submissions)
 - References don't count toward page limit
 - Appendices unlimited but reviewers not required to read
 - LaTeX required for all venues
 **LaTeX Templates:** See [templates/](templates/) directory for all conference templates.
 ---
 ## Using LaTeX Templates Properly
 ### Workflow 4: Starting a New Paper from Template
 **Always copy the entire template directory first, then write within it.**
 ```
 Template Setup Checklist:
 - [ ] Step 1: Copy entire template directory to new project
 - [ ] Step 2: Verify template compiles as-is (before any changes)
 - [ ] Step 3: Read the template's example content to understand structure
 - [ ] Step 4: Replace example content section by section
 - [ ] Step 5: Keep template comments/examples as reference until done
 - [ ] Step 6: Clean up template artifacts only at the end
 ```
 **Step 1: Copy the Full Template**
 ```bash
 # Create your paper directory with the complete template
 cp -r templates/neurips2025/ ~/papers/my-new-paper/
 cd ~/papers/my-new-paper/
 # Verify structure is complete
 ls -la
 # Should see: main.tex, neurips.sty, Makefile, etc.
 ```
 **⚠️ IMPORTANT**: Copy the ENTIRE directory, not just `main.tex`. Templates include:
 - Style files (`.sty`) - required for compilation
 - Bibliography styles (`.bst`) - required for references
 - Example content - useful as reference
 - Makefiles - for easy compilation
 **Step 2: Verify Template Compiles First**
 Before making ANY changes, compile the template as-is:
 ```bash
 # Using latexmk (recommended)
 latexmk -pdf main.tex
 # Or manual compilation
 pdflatex main.tex
 bibtex main
 pdflatex main.tex
 pdflatex main.tex
 ```
 If the unmodified template doesn't compile, fix that first. Common issues:
 - Missing TeX packages → install via `tlmgr install <package>`
 - Wrong TeX distribution → use TeX Live (recommended)
 **Step 3: Keep Template Content as Reference**
 Don't immediately delete all example content. Instead:
 ```latex
 % KEEP template examples commented out as you write
 % This shows you the expected format
 % Template example (keep for reference):
 % \begin{figure}[t]
 %   \centering
 %   \includegraphics[width=0.8\linewidth]{example-image}
 %   \caption{Template shows caption style}
 % \end{figure}
 % Your actual figure:
 \begin{figure}[t]
  \centering
  \includegraphics[width=0.8\linewidth]{your-figure.pdf}
  \caption{Your caption following the same style.}
 \end{figure}
 ```
 **Step 4: Replace Content Section by Section**
 Work through the paper systematically:
 ```
 Replacement Order:
 1. Title and authors (anonymize for submission)
 2. Abstract
 3. Introduction
 4. Methods
 5. Experiments
 6. Related Work
 7. Conclusion
 8. References (your .bib file)
 9. Appendix
 ```
 For each section:
 1. Read the template's example content
 2. Note any special formatting or macros used
 3. Replace with your content following the same patterns
 4. Compile frequently to catch errors early
 **Step 5: Use Template Macros**
 Templates often define useful macros. Check the preamble for:
 ```latex
 % Common template macros to use:
 \newcommand{\method}{YourMethodName}  % Consistent method naming
 \newcommand{\eg}{e.g.,\xspace}        % Proper abbreviations
 \newcommand{\ie}{i.e.,\xspace}
 \newcommand{\etal}{\textit{et al.}\xspace}
 ```
 **Step 6: Clean Up Only at the End**
 Only remove template artifacts when paper is nearly complete:
 ```latex
 % BEFORE SUBMISSION - remove these:
 % - Commented-out template examples
 % - Unused packages
 % - Template's example figures/tables
 % - Lorem ipsum or placeholder text
 % KEEP these:
 % - All style files (.sty)
 % - Bibliography style (.bst)
 % - Required packages from template
 % - Any custom macros you're using
 ```
 ### Template Pitfalls to Avoid
 | Pitfall | Problem | Solution |
 |---------|---------|----------|
 | Copying only `main.tex` | Missing `.sty`, won't compile | Copy entire directory |
 | Modifying `.sty` files | Breaks conference formatting | Never edit style files |
 | Adding random packages | Conflicts, breaks template | Only add if necessary |
 | Deleting template content too early | Lose formatting reference | Keep as comments until done |
 | Not compiling frequently | Errors accumulate | Compile after each section |
 ### Quick Template Reference
 | Conference | Main File | Key Style File | Notes |
 |------------|-----------|----------------|-------|
 | NeurIPS 2025 | `main.tex` | `neurips.sty` | Has Makefile |
 | ICML 2026 | `example_paper.tex` | `icml2026.sty` | Includes algorithm packages |
 | ICLR 2026 | `iclr2026_conference.tex` | `iclr2026_conference.sty` | Has math_commands.tex |
 | ACL | `acl_latex.tex` | `acl.sty` | Strict formatting |
 | AAAI 2026 | `aaai2026-unified-template.tex` | `aaai2026.sty` | Very strict compliance |
 | COLM 2025 | `colm2025_conference.tex` | `colm2025_conference.sty` | Similar to ICLR |
 ---
 ## Conference Resubmission & Format Conversion
 When a paper is rejected or withdrawn from one venue and resubmitted to another, format conversion is required. This is a common workflow in ML research.
 ### Workflow 3: Converting Between Conference Formats
 ```
 Format Conversion Checklist:
 - [ ] Step 1: Identify source and target template differences
 - [ ] Step 2: Create new project with target template
 - [ ] Step 3: Copy content sections (not preamble)
 - [ ] Step 4: Adjust page limits and content
 - [ ] Step 5: Update conference-specific requirements
 - [ ] Step 6: Verify compilation and formatting
 ```
 **Step 1: Key Template Differences**
 | From → To | Page Change | Key Adjustments |
 |-----------|-------------|-----------------|
 | NeurIPS → ICML | 9 → 8 pages | Cut 1 page, add Broader Impact if missing |
 | ICML → ICLR | 8 → 9 pages | Can expand experiments, add LLM disclosure |
 | NeurIPS → ACL | 9 → 8 pages | Restructure for NLP conventions, add Limitations |
 | ICLR → AAAI | 9 → 7 pages | Significant cuts needed, strict style adherence |
 | Any → COLM | varies → 9 | Reframe for language model focus |
 **Step 2: Content Migration (NOT Template Merge)**
 **Never copy LaTeX preambles between templates.** Instead:
 ```bash
 # 1. Start fresh with target template
 cp -r templates/icml2026/ new_submission/
 # 2. Copy ONLY content sections from old paper
 # - Abstract text
 # - Section content (between \section{} commands)
 # - Figures and tables
 # - Bibliography entries
 # 3. Paste into target template structure
 ```
 **Step 3: Adjusting for Page Limits**
 When cutting pages (e.g., NeurIPS 9 → AAAI 7):
 - Move detailed proofs to appendix
 - Condense related work (cite surveys instead of individual papers)
 - Combine similar experiments into unified tables
 - Use smaller figure sizes with subfigures
 - Tighten writing: eliminate redundancy, use active voice
 When expanding (e.g., ICML 8 → ICLR 9):
 - Add ablation studies reviewers requested
 - Expand limitations discussion
 - Include additional baselines
 - Add qualitative examples
 **Step 4: Conference-Specific Adjustments**
 | Target Venue | Required Additions |
 |--------------|-------------------|
 | **ICML** | Broader Impact Statement (after conclusion) |
 | **ICLR** | LLM usage disclosure, reciprocal reviewing agreement |
 | **ACL/EMNLP** | Limitations section (mandatory), Ethics Statement |
 | **AAAI** | Strict adherence to style file (no modifications) |
 | **NeurIPS** | Paper checklist (appendix), lay summary if accepted |
 **Step 5: Update References**
 ```latex
 % Remove self-citations that reveal identity (for blind review)
 % Update any "under review" citations to published versions
 % Add new relevant work published since last submission
 ```
 **Step 6: Addressing Previous Reviews**
 When resubmitting after rejection:
 - **Do** address reviewer concerns in the new version
 - **Do** add experiments/clarifications reviewers requested
 - **Don't** include a "changes from previous submission" section (blind review)
 - **Don't** reference the previous submission or reviews
 **Common Conversion Pitfalls:**
 - ❌ Copying `\usepackage` commands (causes conflicts)
 - ❌ Keeping old conference header/footer commands
 - ❌ Forgetting to update `\bibliography{}` path
 - ❌ Missing conference-specific required sections
 - ❌ Exceeding page limit after format change
 ---
 ## Citation Workflow (Hallucination Prevention)
 **⚠️ CRITICAL**: AI-generated citations have ~40% error rate. **Never write BibTeX from memory.**
 ### The Golden Rule
 ```
 IF you cannot programmatically fetch a citation:
    → Mark it as [CITATION NEEDED] or [PLACEHOLDER - VERIFY]
    → Tell the scientist explicitly
    → NEVER invent a plausible-sounding reference
 ```
 ### Workflow 2: Adding Citations
 ```
 Citation Verification (MANDATORY for every citation):
 - [ ] Step 1: Search using Exa MCP or Semantic Scholar API
 - [ ] Step 2: Verify paper exists in 2+ sources (Semantic Scholar + arXiv/CrossRef)
 - [ ] Step 3: Retrieve BibTeX via DOI (programmatically, not from memory)
 - [ ] Step 4: Verify the claim you're citing actually appears in the paper
 - [ ] Step 5: Add verified BibTeX to bibliography
 - [ ] Step 6: If ANY step fails → mark as placeholder, inform scientist
 ```
 **Step 0: Use Exa MCP for Initial Search (Recommended)**
 If Exa MCP is installed, use it to find relevant papers:
 ```
 Search: "RLHF language model alignment 2023"
 Search: "sparse autoencoders interpretability"
 Search: "attention mechanism transformers Vaswani"
 ```
 Then verify each result with Semantic Scholar and fetch BibTeX via DOI.
 **Step 1: Search Semantic Scholar**
 ```python
 from semanticscholar import SemanticScholar
 sch = SemanticScholar()
 results = sch.search_paper("attention mechanism transformers", limit=5)
 for paper in results:
    print(f"{paper.title} - {paper.paperId}")
    print(f"  DOI: {paper.externalIds.get('DOI', 'N/A')}")
 ```
 **Step 2: Verify Existence**
 Confirm paper appears in at least two sources (Semantic Scholar + CrossRef/arXiv).
 **Step 3: Retrieve BibTeX via DOI**
 ```python
 import requests
 def doi_to_bibtex(doi: str) -> str:
    """Get verified BibTeX from DOI via CrossRef."""
    response = requests.get(
        f"https://doi.org/{doi}",
        headers={"Accept": "application/x-bibtex"}
    )
    response.raise_for_status()
    return response.text
 # Example
 bibtex = doi_to_bibtex("10.48550/arXiv.1706.03762")
 print(bibtex)
 ```
 **Step 4: Verify Claims**
 Before citing for a specific claim, access the paper and confirm the attributed claim actually appears.
 **Step 5: Handle Failures Explicitly**
 If you cannot verify a citation at ANY step:
 ```latex
 % Option 1: Explicit placeholder
 \cite{PLACEHOLDER_smith2023_verify}  % TODO: Could not verify - scientist must confirm
 % Option 2: Note in text
 ... as shown in prior work [CITATION NEEDED - could not verify Smith et al. 2023].
 ```
 **Always inform the scientist:**
 > "I could not verify the following citations and have marked them as placeholders:
 > - Smith et al. 2023 on reward hacking - could not find in Semantic Scholar
 > - Jones 2022 on scaling laws - found similar paper but different authors
 > Please verify these before submission."
 ### Summary: Citation Rules
 | Situation | Action |
 |-----------|--------|
 | Found paper, got DOI, fetched BibTeX | ✅ Use the citation |
 | Found paper, no DOI | ✅ Use arXiv BibTeX or manual entry from paper |
 | Paper exists but can't fetch BibTeX | ⚠️ Mark placeholder, inform scientist |
 | Uncertain if paper exists | ❌ Mark `[CITATION NEEDED]`, inform scientist |
 | "I think there's a paper about X" | ❌ **NEVER cite** - search first or mark placeholder |
 **🚨 NEVER generate BibTeX from memory—always fetch programmatically. 🚨**
 See [references/citation-workflow.md](references/citation-workflow.md) for complete API documentation.
 ---
 ## Common Issues and Solutions
 **Issue: Abstract too generic**
 Delete first sentence if it could be prepended to any ML paper. Start with your specific contribution.
 **Issue: Introduction exceeds 1.5 pages**
 Split background into Related Work. Front-load contribution bullets. Methods should start by page 2-3.
 **Issue: Experiments lack explicit claims**
 Add sentence before each experiment: "This experiment tests whether [specific claim]..."
 **Issue: Reviewers find paper hard to follow**
 - Add explicit signposting: "In this section, we show X"
 - Use consistent terminology throughout
 - Include figure captions that stand alone
 **Issue: Missing statistical significance**
 Always include:
 - Error bars (specify: std dev or std error)
 - Number of runs
 - Statistical tests if comparing methods
 ---
 ## Reviewer Evaluation Criteria
 Reviewers assess papers on four dimensions:
 | Criterion | What Reviewers Look For |
 |-----------|------------------------|
 | **Quality** | Technical soundness, well-supported claims |
 | **Clarity** | Clear writing, reproducible by experts |
 | **Significance** | Community impact, advances understanding |
 | **Originality** | New insights (doesn't require new method) |
 **Scoring (NeurIPS 6-point scale):**
 - 6: Strong Accept - Groundbreaking, flawless
 - 5: Accept - Technically solid, high impact
 - 4: Borderline Accept - Solid, limited evaluation
 - 3: Borderline Reject - Solid but weaknesses outweigh
 - 2: Reject - Technical flaws
 - 1: Strong Reject - Known results or ethics issues
 See [references/reviewer-guidelines.md](references/reviewer-guidelines.md) for detailed reviewer instructions.
 ---
 ## Tables and Figures
 ### Tables
 Use `booktabs` LaTeX package for professional tables:
 ```latex
 \usepackage{booktabs}
 \begin{tabular}{lcc}
 \toprule
 Method & Accuracy ↑ & Latency ↓ \\
 \midrule
 Baseline & 85.2 & 45ms \\
 \textbf{Ours} & \textbf{92.1} & 38ms \\
 \bottomrule
 \end{tabular}
 ```
 **Rules:**
 - Bold best value per metric
 - Include direction symbols (↑ higher is better, ↓ lower is better)
 - Right-align numerical columns
 - Consistent decimal precision
 ### Figures
 - **Vector graphics** (PDF, EPS) for all plots and diagrams
 - **Raster** (PNG 600 DPI) only for photographs
 - Use **colorblind-safe palettes** (Okabe-Ito or Paul Tol)
 - Verify **grayscale readability** (8% of men have color vision deficiency)
 - **No title inside figure**—the caption serves this function
 - **Self-contained captions**—reader should understand without main text
 ---
 ## References & Resources
 ### Reference Documents (Deep Dives)
 | Document | Contents |
 |----------|----------|
 | [writing-guide.md](references/writing-guide.md) | Gopen & Swan 7 principles, Ethan Perez micro-tips, word choice |
 | [citation-workflow.md](references/citation-workflow.md) | Citation APIs, Python code, BibTeX management |
 | [checklists.md](references/checklists.md) | NeurIPS 16-item, ICML, ICLR, ACL requirements |
 | [reviewer-guidelines.md](references/reviewer-guidelines.md) | Evaluation criteria, scoring, rebuttals |
 | [sources.md](references/sources.md) | Complete bibliography of all sources |
 ### LaTeX Templates
 Templates in `templates/` directory: **ICML 2026**, **ICLR 2026**, **NeurIPS 2025**, **ACL/EMNLP**, **AAAI 2026**, **COLM 2025**.
 **Compiling to PDF:**
 - **VS Code/Cursor**: Install LaTeX Workshop extension + TeX Live → Save to auto-compile
 - **Command line**: `latexmk -pdf main.tex` or `pdflatex` + `bibtex` workflow
 - **Online**: Upload to [Overleaf](https://overleaf.com)
 See [templates/README.md](templates/README.md) for detailed setup instructions.
 ### Key External Sources
 **Writing Philosophy:**
 - [Neel Nanda: How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) - Narrative, "What/Why/So What"
 - [Farquhar: How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) - 5-sentence abstract
 - [Gopen & Swan: Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) - 7 reader expectation principles
 - [Lipton: Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) - Word choice
 - [Perez: Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) - Micro-level clarity
 **APIs:** [Semantic Scholar](https://api.semanticscholar.org/api-docs/) | [CrossRef](https://www.crossref.org/documentation/retrieve-metadata/rest-api/) | [arXiv](https://info.arxiv.org/help/api/basics.html)
 **Venues:** [NeurIPS](https://neurips.cc/Conferences/2025/PaperInformation/StyleFiles) | [ICML](https://icml.cc/Conferences/2025/AuthorInstructions) | [ICLR](https://iclr.cc/Conferences/2026/AuthorGuide) | [ACL](https://github.com/acl-org/acl-style-files)
--- a/skills/mlops/ml-paper-writing/references/checklists.md
+++ b/skills/mlops/ml-paper-writing/references/checklists.md
@ -0,0 +1,361 @@
 # Conference Paper Checklists
 This reference documents the mandatory checklist requirements for major ML/AI conferences. All major venues now require paper checklists—missing them results in desk rejection.
 ---
 ## Contents
 - [NeurIPS Paper Checklist](#neurips-paper-checklist)
 - [ICML Paper Checklist](#icml-paper-checklist)
 - [ICLR Requirements](#iclr-requirements)
 - [ACL Requirements](#acl-requirements)
 - [Universal Pre-Submission Checklist](#universal-pre-submission-checklist)
 ---
 ## NeurIPS Paper Checklist
 ### Mandatory Components
 All NeurIPS submissions must include a completed paper checklist. Papers lacking this element face **automatic desk rejection**. The checklist appears after references and supplemental material, outside the page limit.
 ### 16 Required Checklist Items
 #### 1. Claims Alignment
 Authors must verify that abstract and introduction claims match theoretical and experimental results, with clearly stated contributions, assumptions, and limitations.
 **What to check:**
 - [ ] Abstract claims match actual results
 - [ ] Introduction doesn't overclaim
 - [ ] Contributions are specific and falsifiable
 #### 2. Limitations Discussion
 Papers should include a dedicated "Limitations" section addressing strong assumptions, robustness to violations, scope constraints, and performance-influencing factors.
 **What to include:**
 - [ ] Dedicated Limitations section
 - [ ] Honest assessment of scope
 - [ ] Conditions where method may fail
 #### 3. Theory & Proofs
 Theoretical contributions require full assumption statements and complete proofs (main paper or appendix with proof sketches for intuition).
 **What to check:**
 - [ ] All assumptions stated formally
 - [ ] Complete proofs provided (main text or appendix)
 - [ ] Proof sketches for intuition in main text
 #### 4. Reproducibility
 Authors must describe steps ensuring results verification through code release, detailed instructions, model access, or checkpoints appropriate to their contribution type.
 **What to provide:**
 - [ ] Clear reproducibility statement
 - [ ] Code availability information
 - [ ] Model checkpoints if applicable
 #### 5. Data & Code Access
 Instructions for reproducing main experimental results should be provided (supplemental material or URLs), including exact commands and environment specifications.
 **What to include:**
 - [ ] Exact commands to run experiments
 - [ ] Environment specifications (requirements.txt, conda env)
 - [ ] Data access instructions
 #### 6. Experimental Details
 Papers must specify training details: data splits, hyperparameters, and selection methods in the main paper or supplementary materials.
 **What to document:**
 - [ ] Train/val/test split details
 - [ ] All hyperparameters used
 - [ ] Hyperparameter selection method
 #### 7. Statistical Significance
 Results require error bars, confidence intervals, or statistical tests with clearly stated calculation methods and underlying assumptions.
 **What to include:**
 - [ ] Error bars or confidence intervals
 - [ ] Number of runs/seeds
 - [ ] Calculation method (std dev vs std error)
 #### 8. Compute Resources
 Specifications needed: compute worker types (CPU/GPU), memory, storage, execution time per run, and total project compute requirements.
 **What to document:**
 - [ ] GPU type and count
 - [ ] Training time per run
 - [ ] Total compute used
 #### 9. Ethics Code Compliance
 Authors confirm adherence to the NeurIPS Code of Ethics, noting any necessary deviations.
 **What to verify:**
 - [ ] Read NeurIPS Code of Ethics
 - [ ] Confirm compliance
 - [ ] Note any deviations with justification
 #### 10. Broader Impacts
 Discussion of potential negative societal applications, fairness concerns, privacy risks, and possible mitigation strategies when applicable.
 **What to address:**
 - [ ] Potential negative applications
 - [ ] Fairness considerations
 - [ ] Privacy implications
 - [ ] Mitigation strategies
 #### 11. Safeguards
 High-risk models (language models, internet-scraped datasets) require controlled release mechanisms and usage guidelines.
 **What to consider:**
 - [ ] Release strategy for sensitive models
 - [ ] Usage guidelines if needed
 - [ ] Access controls if appropriate
 #### 12. License Respect
 All existing assets require creator citations, license names, URLs, version numbers, and terms-of-service acknowledgment.
 **What to document:**
 - [ ] Dataset licenses cited
 - [ ] Code licenses respected
 - [ ] Version numbers included
 #### 13. Asset Documentation
 New releases need structured templates documenting training details, limitations, consent procedures, and licensing information.
 **For new datasets/models:**
 - [ ] Datasheet or model card
 - [ ] Training data documentation
 - [ ] Known limitations
 #### 14. Human Subjects
 Crowdsourcing studies must include participant instructions, screenshots, compensation details, and comply with minimum wage requirements.
 **What to include:**
 - [ ] Task instructions
 - [ ] Compensation details
 - [ ] Time estimates
 #### 15. IRB Approvals
 Human subjects research requires documented institutional review board approval or equivalent, with risk descriptions disclosed (maintaining anonymity at submission).
 **What to verify:**
 - [ ] IRB approval obtained
 - [ ] Risk assessment completed
 - [ ] Anonymized at submission
 #### 16. LLM Declaration
 Usage of large language models as core methodology components requires disclosure; writing/editing use doesn't require declaration.
 **What to disclose:**
 - [ ] LLM used as core methodology component
 - [ ] How LLM was used
 - [ ] (Writing assistance doesn't require disclosure)
 ### Response Format
 Authors select "yes," "no," or "N/A" per question, with optional 1-2 sentence justifications.
 **Important:** Reviewers are explicitly instructed not to penalize honest limitation acknowledgment.
 ---
 ## ICML Paper Checklist
 ### Broader Impact Statement
 ICML requires a Broader Impact Statement at the end of the paper, before references. This does NOT count toward the page limit.
 **Required elements:**
 - Potential positive impacts
 - Potential negative impacts
 - Mitigation strategies
 - Who may be affected
 ### ICML Specific Requirements
 #### Reproducibility Checklist
 - [ ] Data splits clearly specified
 - [ ] Hyperparameters listed
 - [ ] Search ranges documented
 - [ ] Selection method explained
 - [ ] Compute resources specified
 - [ ] Code availability stated
 #### Statistical Reporting
 - [ ] Error bars on all figures
 - [ ] Standard deviation vs standard error specified
 - [ ] Number of runs stated
 - [ ] Significance tests if comparing methods
 #### Anonymization
 - [ ] No author names in paper
 - [ ] No acknowledgments
 - [ ] No grant numbers
 - [ ] Prior work cited in third person
 - [ ] No identifiable repository URLs
 ---
 ## ICLR Requirements
 ### LLM Disclosure Policy (New for 2026)
 ICLR has a specific LLM disclosure requirement:
 > "If LLMs played a significant role in research ideation and/or writing to the extent that they could be regarded as a contributor, authors must describe their precise role in a separate appendix section."
 **When disclosure is required:**
 - LLM used for significant research ideation
 - LLM used for substantial writing
 - LLM could be considered a contributor
 **When disclosure is NOT required:**
 - Grammar checking
 - Minor editing assistance
 - Code completion tools
 **Consequences of non-disclosure:**
 - Desk rejection
 - Potential post-publication issues
 ### ICLR Specific Requirements
 #### Reproducibility Statement (Optional but Recommended)
 Add a statement referencing:
 - Supporting materials
 - Code availability
 - Data availability
 - Model checkpoints
 #### Ethics Statement (Optional)
 Address potential concerns in ≤1 page. Does not count toward page limit.
 #### Reciprocal Reviewing
 - Authors on 3+ papers must serve as reviewers for ≥6 papers
 - Each submission needs ≥1 author registered to review ≥3 papers
 ---
 ## ACL Requirements
 ### Limitations Section (Mandatory)
 ACL specifically requires a Limitations section:
 **What to include:**
 - Strong assumptions made
 - Scope limitations
 - When method may fail
 - Generalization concerns
 **Important:** The Limitations section does NOT count toward the page limit.
 ### ACL Specific Checklist
 #### Responsible NLP
 - [ ] Bias considerations addressed
 - [ ] Fairness evaluated if applicable
 - [ ] Dual-use concerns discussed
 #### Multilingual Considerations
 If applicable:
 - [ ] Language diversity addressed
 - [ ] Non-English languages included
 - [ ] Translation quality verified
 #### Human Evaluation
 If applicable:
 - [ ] Annotator details provided
 - [ ] Agreement metrics reported
 - [ ] Compensation documented
 ---
 ## Universal Pre-Submission Checklist
 ### Before Every Submission
 #### Paper Content
 - [ ] Abstract ≤ word limit (usually 250-300 words)
 - [ ] Main content within page limit
 - [ ] References complete and verified
 - [ ] Limitations section included
 - [ ] All figures/tables have captions
 - [ ] Captions are self-contained
 #### Formatting
 - [ ] Correct template used (venue + year specific)
 - [ ] Margins not modified
 - [ ] Font sizes not modified
 - [ ] Double-blind requirements met
 - [ ] Page numbers (for review) or none (camera-ready)
 #### Technical
 - [ ] All claims supported by evidence
 - [ ] Error bars included
 - [ ] Baselines appropriate
 - [ ] Hyperparameters documented
 - [ ] Compute resources stated
 #### Reproducibility
 - [ ] Code will be available (or justification)
 - [ ] Data will be available (or justification)
 - [ ] Environment documented
 - [ ] Commands to reproduce provided
 #### Ethics
 - [ ] Broader impacts considered
 - [ ] Limitations honestly stated
 - [ ] Licenses respected
 - [ ] IRB obtained if needed
 #### Final Checks
 - [ ] PDF compiles without errors
 - [ ] All figures render correctly
 - [ ] All citations resolve
 - [ ] Supplementary material organized
 - [ ] Conference checklist completed
 ---
 ## Quick Reference: Page Limits
 | Conference | Main Content | References | Appendix |
 |------------|-------------|------------|----------|
 | NeurIPS 2025 | 9 pages | Unlimited | Unlimited (checklist separate) |
 | ICML 2026 | 8 pages (+1 camera) | Unlimited | Unlimited |
 | ICLR 2026 | 9 pages (+1 camera) | Unlimited | Unlimited |
 | ACL 2025 | 8 pages (long) | Unlimited | Unlimited |
 | AAAI 2026 | 7 pages (+1 camera) | Unlimited | Unlimited |
 | COLM 2025 | 9 pages (+1 camera) | Unlimited | Unlimited |
 ---
 ## Template Locations
 All conference templates are in the `templates/` directory:
 ```
 templates/
 ├── icml2026/       # ICML 2026 official
 ├── iclr2026/       # ICLR 2026 official
 ├── neurips2025/    # NeurIPS 2025
 ├── acl/            # ACL style files
 ├── aaai2026/       # AAAI 2026
 └── colm2025/       # COLM 2025
 ```
--- a/skills/mlops/ml-paper-writing/references/citation-workflow.md
+++ b/skills/mlops/ml-paper-writing/references/citation-workflow.md
@ -0,0 +1,562 @@
 # Citation Management & Hallucination Prevention
 This reference provides a complete workflow for managing citations programmatically, preventing AI-generated citation hallucinations, and maintaining clean bibliographies.
 ---
 ## Contents
 - [Why Citation Verification Matters](#why-citation-verification-matters)
 - [Citation APIs Overview](#citation-apis-overview)
 - [Verified Citation Workflow](#verified-citation-workflow)
 - [Python Implementation](#python-implementation)
 - [BibTeX Management](#bibtex-management)
 - [Common Citation Formats](#common-citation-formats)
 - [Troubleshooting](#troubleshooting)
 ---
 ## Why Citation Verification Matters
 ### The Hallucination Problem
 Research has documented significant issues with AI-generated citations:
 - **~40% error rate** in AI-generated citations (Enago Academy research)
 - NeurIPS 2025 found **100+ hallucinated citations** slipped through review
 - Common errors include:
  - Fabricated paper titles with real author names
  - Wrong publication venues or years
  - Non-existent papers with plausible metadata
  - Incorrect DOIs or arXiv IDs
 ### Consequences
 - Desk rejection at some venues
 - Loss of credibility with reviewers
 - Potential retraction if published
 - Wasted time chasing non-existent sources
 ### Solution
 **Never generate citations from memory—always verify programmatically.**
 ---
 ## Citation APIs Overview
 ### Primary APIs
 | API | Coverage | Rate Limits | Best For |
 |-----|----------|-------------|----------|
 | **Semantic Scholar** | 214M papers | 1 RPS (free key) | ML/AI papers, citation graphs |
 | **CrossRef** | 140M+ DOIs | Polite pool with mailto | DOI lookup, BibTeX retrieval |
 | **arXiv** | Preprints | 3-second delays | ML preprints, PDF access |
 | **OpenAlex** | 240M+ works | 100K/day, 10 RPS | Open alternative to MAG |
 ### API Selection Guide
 ```
 Need ML paper search? → Semantic Scholar
 Have DOI, need BibTeX? → CrossRef content negotiation
 Looking for preprint? → arXiv API
 Need open data, bulk access? → OpenAlex
 ```
 ### No Official Google Scholar API
 Google Scholar has no official API. Scraping violates ToS. Use SerpApi ($75-275/month) only if Semantic Scholar coverage is insufficient.
 ---
 ## Verified Citation Workflow
 ### 5-Step Process
 ```
 1. SEARCH → Query Semantic Scholar with specific keywords
     ↓
 2. VERIFY → Confirm paper exists in 2+ sources
     ↓
 3. RETRIEVE → Get BibTeX via DOI content negotiation
     ↓
 4. VALIDATE → Confirm the claim appears in source
     ↓
 5. ADD → Add verified entry to .bib file
 ```
 ### Step 1: Search
 Use Semantic Scholar for ML/AI papers:
 ```python
 from semanticscholar import SemanticScholar
 sch = SemanticScholar()
 results = sch.search_paper("transformer attention mechanism", limit=10)
 for paper in results:
    print(f"Title: {paper.title}")
    print(f"Year: {paper.year}")
    print(f"DOI: {paper.externalIds.get('DOI', 'N/A')}")
    print(f"arXiv: {paper.externalIds.get('ArXiv', 'N/A')}")
    print(f"Citation count: {paper.citationCount}")
    print("---")
 ```
 ### Step 2: Verify Existence
 Confirm paper exists in at least two sources:
 ```python
 import requests
 def verify_paper(doi=None, arxiv_id=None, title=None):
    """Verify paper exists in multiple sources."""
    sources_found = []
    # Check Semantic Scholar
    sch = SemanticScholar()
    if doi:
        paper = sch.get_paper(f"DOI:{doi}")
        if paper:
            sources_found.append("Semantic Scholar")
    # Check CrossRef (via DOI)
    if doi:
        resp = requests.get(f"https://api.crossref.org/works/{doi}")
        if resp.status_code == 200:
            sources_found.append("CrossRef")
    # Check arXiv
    if arxiv_id:
        resp = requests.get(
            f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
        )
        if "<entry>" in resp.text:
            sources_found.append("arXiv")
    return len(sources_found) >= 2, sources_found
 ```
 ### Step 3: Retrieve BibTeX
 Use DOI content negotiation for guaranteed accuracy:
 ```python
 import requests
 def doi_to_bibtex(doi: str) -> str:
    """Get verified BibTeX from DOI via CrossRef content negotiation."""
    response = requests.get(
        f"https://doi.org/{doi}",
        headers={"Accept": "application/x-bibtex"},
        allow_redirects=True
    )
    response.raise_for_status()
    return response.text
 # Example: "Attention Is All You Need"
 bibtex = doi_to_bibtex("10.48550/arXiv.1706.03762")
 print(bibtex)
 ```
 ### Step 4: Validate Claims
 Before citing a paper for a specific claim, verify the claim exists:
 ```python
 def get_paper_abstract(doi):
    """Get abstract to verify claims."""
    sch = SemanticScholar()
    paper = sch.get_paper(f"DOI:{doi}")
    return paper.abstract if paper else None
 # Verify claim appears in abstract
 abstract = get_paper_abstract("10.48550/arXiv.1706.03762")
 claim = "attention mechanism"
 if claim.lower() in abstract.lower():
    print("Claim appears in paper")
 ```
 ### Step 5: Add to Bibliography
 Add verified entry to your .bib file with consistent key format:
 ```python
 def generate_citation_key(bibtex: str) -> str:
    """Generate consistent citation key: author_year_firstword."""
    import re
    # Extract author
    author_match = re.search(r'author\s*=\s*\{([^}]+)\}', bibtex, re.I)
    if author_match:
        first_author = author_match.group(1).split(',')[0].split()[-1]
    else:
        first_author = "unknown"
    # Extract year
    year_match = re.search(r'year\s*=\s*\{?(\d{4})\}?', bibtex, re.I)
    year = year_match.group(1) if year_match else "0000"
    # Extract title first word
    title_match = re.search(r'title\s*=\s*\{([^}]+)\}', bibtex, re.I)
    if title_match:
        first_word = title_match.group(1).split()[0].lower()
        first_word = re.sub(r'[^a-z]', '', first_word)
    else:
        first_word = "paper"
    return f"{first_author.lower()}_{year}_{first_word}"
 ```
 ---
 ## Python Implementation
 ### Complete Citation Manager Class
 ```python
 """
 Citation Manager - Verified citation workflow for ML papers.
 """
 import requests
 import time
 from typing import Optional, List, Dict, Tuple
 from dataclasses import dataclass
 try:
    from semanticscholar import SemanticScholar
 except ImportError:
    print("Install: pip install semanticscholar")
    SemanticScholar = None
@dataclass
 class Paper:
    title: str
    authors: List[str]
    year: int
    doi: Optional[str]
    arxiv_id: Optional[str]
    venue: Optional[str]
    citation_count: int
    abstract: Optional[str]
 class CitationManager:
    """Manage citations with verification."""
    def __init__(self, api_key: Optional[str] = None):
        self.sch = SemanticScholar(api_key=api_key) if SemanticScholar else None
        self.verified_papers: Dict[str, Paper] = {}
    def search(self, query: str, limit: int = 10) -> List[Paper]:
        """Search for papers using Semantic Scholar."""
        if not self.sch:
            raise RuntimeError("Semantic Scholar not available")
        results = self.sch.search_paper(query, limit=limit)
        papers = []
        for r in results:
            paper = Paper(
                title=r.title,
                authors=[a.name for a in (r.authors or [])],
                year=r.year or 0,
                doi=r.externalIds.get('DOI') if r.externalIds else None,
                arxiv_id=r.externalIds.get('ArXiv') if r.externalIds else None,
                venue=r.venue,
                citation_count=r.citationCount or 0,
                abstract=r.abstract
            )
            papers.append(paper)
        return papers
    def verify(self, paper: Paper) -> Tuple[bool, List[str]]:
        """Verify paper exists in multiple sources."""
        sources = []
        # Already found in Semantic Scholar via search
        sources.append("Semantic Scholar")
        # Check CrossRef if DOI available
        if paper.doi:
            try:
                resp = requests.get(
                    f"https://api.crossref.org/works/{paper.doi}",
                    timeout=10
                )
                if resp.status_code == 200:
                    sources.append("CrossRef")
            except:
                pass
        # Check arXiv if ID available
        if paper.arxiv_id:
            try:
                resp = requests.get(
                    f"http://export.arxiv.org/api/query?id_list={paper.arxiv_id}",
                    timeout=10
                )
                if "<entry>" in resp.text and "<title>" in resp.text:
                    sources.append("arXiv")
            except:
                pass
        return len(sources) >= 2, sources
    def get_bibtex(self, paper: Paper) -> Optional[str]:
        """Get BibTeX for verified paper."""
        if paper.doi:
            try:
                resp = requests.get(
                    f"https://doi.org/{paper.doi}",
                    headers={"Accept": "application/x-bibtex"},
                    timeout=10,
                    allow_redirects=True
                )
                if resp.status_code == 200:
                    return resp.text
            except:
                pass
        # Fallback: generate from paper data
        return self._generate_bibtex(paper)
    def _generate_bibtex(self, paper: Paper) -> str:
        """Generate BibTeX from paper metadata."""
        # Generate citation key
        first_author = paper.authors[0].split()[-1] if paper.authors else "unknown"
        first_word = paper.title.split()[0].lower().replace(',', '').replace(':', '')
        key = f"{first_author.lower()}_{paper.year}_{first_word}"
        # Format authors
        authors = " and ".join(paper.authors) if paper.authors else "Unknown"
        bibtex = f"""@article{{{key},
  title = {{{paper.title}}},
  author = {{{authors}}},
  year = {{{paper.year}}},
  {'doi = {' + paper.doi + '},' if paper.doi else ''}
  {'eprint = {' + paper.arxiv_id + '},' if paper.arxiv_id else ''}
  {'journal = {' + paper.venue + '},' if paper.venue else ''}
 }}"""
        return bibtex
    def cite(self, query: str) -> Optional[str]:
        """Full workflow: search, verify, return BibTeX."""
        # Search
        papers = self.search(query, limit=5)
        if not papers:
            return None
        # Take top result
        paper = papers[0]
        # Verify
        verified, sources = self.verify(paper)
        if not verified:
            print(f"Warning: Could only verify in {sources}")
        # Get BibTeX
        bibtex = self.get_bibtex(paper)
        # Cache
        if bibtex:
            self.verified_papers[paper.title] = paper
        return bibtex
 # Usage example
 if __name__ == "__main__":
    cm = CitationManager()
    # Search and cite
    bibtex = cm.cite("attention is all you need transformer")
    if bibtex:
        print(bibtex)
 ```
 ### Quick Functions
 ```python
 def quick_cite(query: str) -> str:
    """One-liner citation."""
    cm = CitationManager()
    return cm.cite(query)
 def batch_cite(queries: List[str], output_file: str = "references.bib"):
    """Cite multiple papers and save to file."""
    cm = CitationManager()
    bibtex_entries = []
    for query in queries:
        print(f"Processing: {query}")
        bibtex = cm.cite(query)
        if bibtex:
            bibtex_entries.append(bibtex)
        time.sleep(1)  # Rate limiting
    with open(output_file, 'w') as f:
        f.write("\n\n".join(bibtex_entries))
    print(f"Saved {len(bibtex_entries)} citations to {output_file}")
 ```
 ---
 ## BibTeX Management
 ### BibTeX vs BibLaTeX
 | Feature | BibTeX | BibLaTeX |
 |---------|--------|----------|
 | Unicode support | Limited | Full |
 | Entry types | Standard | Extended (@online, @dataset) |
 | Customization | Limited | Highly flexible |
 | Backend | bibtex | Biber (recommended) |
 **Recommendation**: Use BibLaTeX with Biber for new papers.
 ### LaTeX Setup
 ```latex
 % In preamble
 \usepackage[
    backend=biber,
    style=numeric,
    sorting=none
 ]{biblatex}
 \addbibresource{references.bib}
 % In document
 \cite{vaswani_2017_attention}
 % At end
 \printbibliography
 ```
 ### Citation Commands
 ```latex
 \cite{key}      % Numeric: [1]
 \citep{key}     % Parenthetical: (Author, 2020)
 \citet{key}     % Textual: Author (2020)
 \citeauthor{key} % Just author name
 \citeyear{key}  % Just year
 ```
 ### Consistent Citation Keys
 Use format: `author_year_firstword`
 ```
 vaswani_2017_attention
 devlin_2019_bert
 brown_2020_language
 ```
 ---
 ## Common Citation Formats
 ### Conference Paper
 ```bibtex
@inproceedings{vaswani_2017_attention,
  title = {Attention Is All You Need},
  author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
            Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and
            Kaiser, Lukasz and Polosukhin, Illia},
  booktitle = {Advances in Neural Information Processing Systems},
  volume = {30},
  year = {2017},
  publisher = {Curran Associates, Inc.}
 }
 ```
 ### Journal Article
 ```bibtex
@article{hochreiter_1997_long,
  title = {Long Short-Term Memory},
  author = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  journal = {Neural Computation},
  volume = {9},
  number = {8},
  pages = {1735--1780},
  year = {1997},
  publisher = {MIT Press}
 }
 ```
 ### arXiv Preprint
 ```bibtex
@misc{brown_2020_language,
  title = {Language Models are Few-Shot Learners},
  author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and others},
  year = {2020},
  eprint = {2005.14165},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL}
 }
 ```
 ---
 ## Troubleshooting
 ### Common Issues
 **Issue: Semantic Scholar returns no results**
 - Try more specific keywords
 - Check spelling of author names
 - Use quotation marks for exact phrases
 **Issue: DOI doesn't resolve to BibTeX**
 - DOI may be registered but not linked to CrossRef
 - Try arXiv ID instead if available
 - Generate BibTeX from metadata manually
 **Issue: Rate limiting errors**
 - Add delays between requests (1-3 seconds)
 - Use API key if available
 - Cache results to avoid repeat queries
 **Issue: Encoding problems in BibTeX**
 - Use proper LaTeX escaping: `{\"u}` for ü
 - Ensure file is UTF-8 encoded
 - Use BibLaTeX with Biber for better Unicode
 ### Verification Checklist
 Before adding a citation:
 - [ ] Paper found in at least 2 sources
 - [ ] DOI or arXiv ID verified
 - [ ] BibTeX retrieved (not generated from memory)
 - [ ] Entry type correct (@inproceedings vs @article)
 - [ ] Author names complete and correctly formatted
 - [ ] Year and venue verified
 - [ ] Citation key follows consistent format
 ---
 ## Additional Resources
 **APIs:**
 - Semantic Scholar: https://api.semanticscholar.org/api-docs/
 - CrossRef: https://www.crossref.org/documentation/retrieve-metadata/rest-api/
 - arXiv: https://info.arxiv.org/help/api/basics.html
 - OpenAlex: https://docs.openalex.org/
 **Python Libraries:**
 - `semanticscholar`: https://pypi.org/project/semanticscholar/
 - `arxiv`: https://pypi.org/project/arxiv/
 - `habanero` (CrossRef): https://github.com/sckott/habanero
 **Verification Tools:**
 - Citely: https://citely.ai/citation-checker
 - ReciteWorks: https://reciteworks.com/
--- a/skills/mlops/ml-paper-writing/references/reviewer-guidelines.md
+++ b/skills/mlops/ml-paper-writing/references/reviewer-guidelines.md
@ -0,0 +1,367 @@
 # Reviewer Guidelines & Evaluation Criteria
 This reference documents how reviewers evaluate papers at major ML/AI conferences, helping authors anticipate and address reviewer concerns.
 ---
 ## Contents
 - [Universal Evaluation Dimensions](#universal-evaluation-dimensions)
 - [NeurIPS Reviewer Guidelines](#neurips-reviewer-guidelines)
 - [ICML Reviewer Guidelines](#icml-reviewer-guidelines)
 - [ICLR Reviewer Guidelines](#iclr-reviewer-guidelines)
 - [ACL Reviewer Guidelines](#acl-reviewer-guidelines)
 - [What Makes Reviews Strong](#what-makes-reviews-strong)
 - [Common Reviewer Concerns](#common-reviewer-concerns)
 - [How to Address Reviewer Feedback](#how-to-address-reviewer-feedback)
 ---
 ## Universal Evaluation Dimensions
 All major ML conferences assess papers across four core dimensions:
 ### 1. Quality (Technical Soundness)
 **What reviewers ask:**
 - Are claims well-supported by theoretical analysis or experimental results?
 - Are the proofs correct? Are the experiments properly controlled?
 - Are baselines appropriate and fairly compared?
 - Is the methodology sound?
 **How to ensure high quality:**
 - Include complete proofs (main paper or appendix with sketches)
 - Use appropriate baselines (not strawmen)
 - Report variance/error bars with methodology
 - Document hyperparameter selection process
 ### 2. Clarity (Writing & Organization)
 **What reviewers ask:**
 - Is the paper clearly written and well organized?
 - Can an expert in the field reproduce the results?
 - Is notation consistent? Are terms defined?
 - Is the paper self-contained?
 **How to ensure clarity:**
 - Use consistent terminology throughout
 - Define all notation at first use
 - Include reproducibility details (appendix acceptable)
 - Have non-authors read before submission
 ### 3. Significance (Impact & Importance)
 **What reviewers ask:**
 - Are the results impactful for the community?
 - Will others build upon this work?
 - Does it address an important problem?
 - What is the potential for real-world impact?
 **How to demonstrate significance:**
 - Clearly articulate the problem's importance
 - Connect to broader research themes
 - Discuss potential applications
 - Compare to existing approaches meaningfully
 ### 4. Originality (Novelty & Contribution)
 **What reviewers ask:**
 - Does this provide new insights?
 - How does it differ from prior work?
 - Is the contribution non-trivial?
 **Key insight from NeurIPS guidelines:**
 > "Originality does not necessarily require introducing an entirely new method. Papers that provide novel insights from evaluating existing approaches or shed light on why methods succeed can also be highly original."
 ---
 ## NeurIPS Reviewer Guidelines
 ### Scoring System (1-6 Scale)
 | Score | Label | Description |
 |-------|-------|-------------|
 | **6** | Strong Accept | Groundbreaking, flawless work; top 2-3% of submissions |
 | **5** | Accept | Technically solid, high impact; would benefit the community |
 | **4** | Borderline Accept | Solid work with limited evaluation; leans accept |
 | **3** | Borderline Reject | Solid but weaknesses outweigh strengths; leans reject |
 | **2** | Reject | Technical flaws or weak evaluation |
 | **1** | Strong Reject | Well-known results or unaddressed ethics concerns |
 ### Reviewer Instructions
 Reviewers are explicitly instructed to:
 1. **Evaluate the paper as written** - not what it could be with revisions
 2. **Provide constructive feedback** - 3-5 actionable points
 3. **Not penalize honest limitations** - acknowledging weaknesses is encouraged
 4. **Assess reproducibility** - can the work be verified?
 5. **Consider ethical implications** - potential misuse or harm
 ### What Reviewers Should Avoid
 - Superficial, uninformed reviews
 - Demanding unreasonable additional experiments
 - Penalizing authors for honest limitation acknowledgment
 - Rejecting for missing citations to reviewer's own work
 ### Timeline (NeurIPS 2025)
 - Bidding: May 17-21
 - Reviewing period: May 29 - July 2
 - Author rebuttals: July 24-30
 - Discussion period: July 31 - August 13
 - Final notifications: September 18
 ---
 ## ICML Reviewer Guidelines
 ### Review Structure
 ICML reviewers provide:
 1. **Summary** - Brief description of contributions
 2. **Strengths** - Positive aspects
 3. **Weaknesses** - Areas for improvement
 4. **Questions** - Clarifications for authors
 5. **Limitations** - Assessment of stated limitations
 6. **Ethics** - Any concerns
 7. **Overall Score** - Recommendation
 ### Scoring Guidelines
 ICML uses a similar 1-6 scale with calibration:
 - Top 25% of accepted papers: Score 5-6
 - Typical accepted paper: Score 4-5
 - Borderline: Score 3-4
 - Clear reject: Score 1-2
 ### Key Evaluation Points
 1. **Reproducibility** - Are there enough details?
 2. **Experimental rigor** - Multiple seeds, proper baselines?
 3. **Writing quality** - Clear, organized, well-structured?
 4. **Novelty** - Non-trivial contribution?
 ---
 ## ICLR Reviewer Guidelines
 ### OpenReview Process
 ICLR uses OpenReview with:
 - Public reviews (after acceptance decisions)
 - Author responses visible to reviewers
 - Discussion between reviewers and ACs
 ### Scoring
 ICLR reviews include:
 - **Soundness**: 1-4 scale
 - **Presentation**: 1-4 scale
 - **Contribution**: 1-4 scale
 - **Overall**: 1-10 scale
 - **Confidence**: 1-5 scale
 ### Unique ICLR Considerations
 1. **LLM Disclosure** - Reviewers assess whether LLM use is properly disclosed
 2. **Reproducibility** - Emphasis on code availability
 3. **Reciprocal Reviewing** - Authors must also serve as reviewers
 ---
 ## ACL Reviewer Guidelines
 ### ACL-Specific Criteria
 ACL adds NLP-specific evaluation:
 1. **Linguistic soundness** - Are linguistic claims accurate?
 2. **Resource documentation** - Are datasets/models properly documented?
 3. **Multilingual consideration** - If applicable, is language diversity addressed?
 ### Limitations Section
 ACL specifically requires a Limitations section. Reviewers check:
 - Are limitations honest and comprehensive?
 - Do limitations undermine core claims?
 - Are potential negative impacts addressed?
 ### Ethics Review
 ACL has a dedicated ethics review process for:
 - Dual-use concerns
 - Data privacy issues
 - Bias and fairness implications
 ---
 ## What Makes Reviews Strong
 ### Following Daniel Dennett's Rules
 Good reviewers follow these principles:
 1. **Re-express the position fairly** - Show you understand the paper
 2. **List agreements** - Acknowledge what works well
 3. **List what you learned** - Credit the contribution
 4. **Only then critique** - After establishing understanding
 ### Review Structure Best Practices
 **Strong Review Structure:**
 ```
 Summary (1 paragraph):
 - What the paper does
 - Main contribution claimed
 Strengths (3-5 bullets):
 - Specific positive aspects
 - Why these matter
 Weaknesses (3-5 bullets):
 - Specific concerns
 - Why these matter
 - Suggestions for addressing
 Questions (2-4 items):
 - Clarifications needed
 - Things that would change assessment
 Minor Issues (optional):
 - Typos, unclear sentences
 - Formatting issues
 Overall Assessment:
 - Clear recommendation with reasoning
 ```
 ---
 ## Common Reviewer Concerns
 ### Technical Concerns
 | Concern | How to Pre-empt |
 |---------|-----------------|
 | "Baselines too weak" | Use state-of-the-art baselines, cite recent work |
 | "Missing ablations" | Include systematic ablation study |
 | "No error bars" | Report std dev/error, multiple runs |
 | "Hyperparameters not tuned" | Document tuning process, search ranges |
 | "Claims not supported" | Ensure every claim has evidence |
 ### Novelty Concerns
 | Concern | How to Pre-empt |
 |---------|-----------------|
 | "Incremental contribution" | Clearly articulate what's new vs prior work |
 | "Similar to [paper X]" | Explicitly compare to X in Related Work |
 | "Straightforward extension" | Highlight non-obvious aspects |
 ### Clarity Concerns
 | Concern | How to Pre-empt |
 |---------|-----------------|
 | "Hard to follow" | Use clear structure, signposting |
 | "Notation inconsistent" | Review all notation, create notation table |
 | "Missing details" | Include reproducibility appendix |
 | "Figures unclear" | Self-contained captions, proper sizing |
 ### Significance Concerns
 | Concern | How to Pre-empt |
 |---------|-----------------|
 | "Limited impact" | Discuss broader implications |
 | "Narrow evaluation" | Evaluate on multiple benchmarks |
 | "Only works in restricted setting" | Acknowledge scope, explain why still valuable |
 ---
 ## How to Address Reviewer Feedback
 ### Rebuttal Best Practices
 **Do:**
 - Thank reviewers for their time
 - Address each concern specifically
 - Provide evidence (new experiments if possible)
 - Be concise—reviewers are busy
 - Acknowledge valid criticisms
 **Don't:**
 - Be defensive or dismissive
 - Make promises you can't keep
 - Ignore difficult criticisms
 - Write excessively long rebuttals
 - Argue about subjective assessments
 ### Rebuttal Template
 ```markdown
 We thank the reviewers for their thoughtful feedback.
 ## Reviewer 1
 **R1-Q1: [Quoted concern]**
 [Direct response with evidence]
 **R1-Q2: [Quoted concern]**
 [Direct response with evidence]
 ## Reviewer 2
 ...
 ## Summary of Changes
 If accepted, we will:
 1. [Specific change]
 2. [Specific change]
 3. [Specific change]
 ```
 ### When to Accept Criticism
 Some reviewer feedback should simply be accepted:
 - Valid technical errors
 - Missing important related work
 - Unclear explanations
 - Missing experimental details
 Acknowledge these gracefully: "The reviewer is correct that... We will revise to..."
 ### When to Push Back
 You can respectfully disagree when:
 - Reviewer misunderstood the paper
 - Requested experiments are out of scope
 - Criticism is factually incorrect
 Frame disagreements constructively: "We appreciate this perspective. However, [explanation]..."
 ---
 ## Pre-Submission Reviewer Simulation
 Before submitting, ask yourself:
 **Quality:**
 - [ ] Would I trust these results if I saw them?
 - [ ] Are all claims supported by evidence?
 - [ ] Are baselines fair and recent?
 **Clarity:**
 - [ ] Can someone reproduce this from the paper?
 - [ ] Is the writing clear to non-experts in this subfield?
 - [ ] Are all terms and notation defined?
 **Significance:**
 - [ ] Why should the community care about this?
 - [ ] What can people do with this work?
 - [ ] Is the problem important?
 **Originality:**
 - [ ] What specifically is new here?
 - [ ] How does this differ from closest related work?
 - [ ] Is the contribution non-trivial?
--- a/skills/mlops/ml-paper-writing/references/sources.md
+++ b/skills/mlops/ml-paper-writing/references/sources.md
@ -0,0 +1,159 @@
 # Source Bibliography
 This document lists all authoritative sources used to build this skill, organized by topic.
 ---
 ## Writing Philosophy & Guides
 ### Primary Sources (Must-Read)
 | Source | Author | URL | Key Contribution |
 |--------|--------|-----|------------------|
 | **Highly Opinionated Advice on How to Write ML Papers** | Neel Nanda | [Alignment Forum](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) | Narrative framework, "What/Why/So What", time allocation |
 | **How to Write ML Papers** | Sebastian Farquhar (DeepMind) | [Blog](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) | 5-sentence abstract formula, structure templates |
 | **A Survival Guide to a PhD** | Andrej Karpathy | [Blog](http://karpathy.github.io/2016/09/07/phd/) | Paper structure recipe, contribution framing |
 | **Heuristics for Scientific Writing** | Zachary Lipton (CMU) | [Blog](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) | Word choice, section balance, intensifier warnings |
 | **Advice for Authors** | Jacob Steinhardt (UC Berkeley) | [Blog](https://jsteinhardt.stat.berkeley.edu/blog/advice-for-authors) | Precision over brevity, consistent terminology |
 | **Easy Paper Writing Tips** | Ethan Perez (Anthropic) | [Blog](https://ethanperez.net/easy-paper-writing-tips/) | Micro-level tips, apostrophe unfolding, clarity tricks |
 ### Foundational Scientific Writing
 | Source | Author | URL | Key Contribution |
 |--------|--------|-----|------------------|
 | **The Science of Scientific Writing** | Gopen & Swan | [PDF](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) | Topic/stress positions, old-before-new, 7 principles |
 | **Summary of Science of Scientific Writing** | Lawrence Crowl | [Summary](https://www.crowl.org/Lawrence/writing/GopenSwan90.html) | Condensed version of Gopen & Swan |
 ### Additional Resources
 | Source | URL | Key Contribution |
 |--------|-----|------------------|
 | How To Write A Research Paper In ML | [Blog](https://grigorisg9gr.github.io/machine%20learning/research%20paper/how-to-write-a-research-paper-in-machine-learning/) | Practical walkthrough, LaTeX tips |
 | A Recipe for Training Neural Networks | [Karpathy Blog](http://karpathy.github.io/2019/04/25/recipe/) | Debugging methodology that translates to paper structure |
 | ICML Paper Writing Best Practices | [ICML](https://icml.cc/Conferences/2022/BestPractices) | Official venue guidance |
 | Bill Freeman's Writing Slides | [MIT](https://billf.mit.edu/sites/default/files/documents/cvprPapers.pdf) | Visual guide to paper structure |
 ---
 ## Official Conference Guidelines
 ### NeurIPS
 | Document | URL | Purpose |
 |----------|-----|---------|
 | Paper Checklist Guidelines | [NeurIPS](https://neurips.cc/public/guides/PaperChecklist) | 16-item mandatory checklist |
 | Reviewer Guidelines 2025 | [NeurIPS](https://neurips.cc/Conferences/2025/ReviewerGuidelines) | Evaluation criteria, scoring |
 | Style Files | [NeurIPS](https://neurips.cc/Conferences/2025/PaperInformation/StyleFiles) | LaTeX templates |
 ### ICML
 | Document | URL | Purpose |
 |----------|-----|---------|
 | Paper Guidelines | [ICML](https://icml.cc/Conferences/2024/PaperGuidelines) | Submission requirements |
 | Reviewer Instructions 2025 | [ICML](https://icml.cc/Conferences/2025/ReviewerInstructions) | Review form, evaluation |
 | Style & Author Instructions | [ICML](https://icml.cc/Conferences/2022/StyleAuthorInstructions) | Formatting specifications |
 ### ICLR
 | Document | URL | Purpose |
 |----------|-----|---------|
 | Author Guide 2026 | [ICLR](https://iclr.cc/Conferences/2026/AuthorGuide) | Submission requirements, LLM disclosure |
 | Reviewer Guide 2025 | [ICLR](https://iclr.cc/Conferences/2025/ReviewerGuide) | Review process, evaluation |
 ### ACL/EMNLP
 | Document | URL | Purpose |
 |----------|-----|---------|
 | ACL Style Files | [GitHub](https://github.com/acl-org/acl-style-files) | LaTeX templates |
 | ACL Rolling Review | [ARR](https://aclrollingreview.org/) | Submission process |
 ### AAAI
 | Document | URL | Purpose |
 |----------|-----|---------|
 | Author Kit 2026 | [AAAI](https://aaai.org/authorkit26/) | Templates and guidelines |
 ### COLM
 | Document | URL | Purpose |
 |----------|-----|---------|
 | Template | [GitHub](https://github.com/COLM-org/Template) | LaTeX templates |
 ---
 ## Citation APIs & Tools
 ### APIs
 | API | Documentation | Best For |
 |-----|---------------|----------|
 | **Semantic Scholar** | [Docs](https://api.semanticscholar.org/api-docs/) | ML/AI papers, citation graphs |
 | **CrossRef** | [Docs](https://www.crossref.org/documentation/retrieve-metadata/rest-api/) | DOI lookup, BibTeX retrieval |
 | **arXiv** | [Docs](https://info.arxiv.org/help/api/basics.html) | Preprints, PDF access |
 | **OpenAlex** | [Docs](https://docs.openalex.org/) | Open alternative, bulk access |
 ### Python Libraries
 | Library | Install | Purpose |
 |---------|---------|---------|
 | `semanticscholar` | `pip install semanticscholar` | Semantic Scholar wrapper |
 | `arxiv` | `pip install arxiv` | arXiv search and download |
 | `habanero` | `pip install habanero` | CrossRef client |
 ### Citation Verification
 | Tool | URL | Purpose |
 |------|-----|---------|
 | Citely | [citely.ai](https://citely.ai/citation-checker) | Batch verification |
 | ReciteWorks | [reciteworks.com](https://reciteworks.com/) | In-text citation checking |
 ---
 ## Visualization & Formatting
 ### Figure Creation
 | Tool | URL | Purpose |
 |------|-----|---------|
 | PlotNeuralNet | [GitHub](https://github.com/HarisIqbal88/PlotNeuralNet) | TikZ neural network diagrams |
 | SciencePlots | [GitHub](https://github.com/garrettj403/SciencePlots) | Publication-ready matplotlib |
 | Okabe-Ito Palette | [Reference](https://jfly.uni-koeln.de/color/) | Colorblind-safe colors |
 ### LaTeX Resources
 | Resource | URL | Purpose |
 |----------|-----|---------|
 | Overleaf Templates | [Overleaf](https://www.overleaf.com/latex/templates) | Online LaTeX editor |
 | BibLaTeX Guide | [CTAN](https://ctan.org/pkg/biblatex) | Modern citation management |
 ---
 ## Research on AI Writing & Hallucination
 | Source | URL | Key Finding |
 |--------|-----|-------------|
 | AI Hallucinations in Citations | [Enago](https://www.enago.com/academy/ai-hallucinations-research-citations/) | ~40% error rate |
 | Hallucination in AI Writing | [PMC](https://pmc.ncbi.nlm.nih.gov/articles/PMC10726751/) | Types of citation errors |
 | NeurIPS 2025 AI Report | [ByteIota](https://byteiota.com/neurips-2025-100-ai-hallucinations-slip-through-review/) | 100+ hallucinated citations |
 ---
 ## Quick Reference by Topic
 ### For Narrative & Structure
 → Start with: Neel Nanda, Sebastian Farquhar, Andrej Karpathy
 ### For Sentence-Level Clarity
 → Start with: Gopen & Swan, Ethan Perez, Zachary Lipton
 ### For Word Choice & Style
 → Start with: Zachary Lipton, Jacob Steinhardt
 ### For Conference-Specific Requirements
 → Start with: Official venue guidelines (NeurIPS, ICML, ICLR, ACL)
 ### For Citation Management
 → Start with: Semantic Scholar API, CrossRef, citation-workflow.md
 ### For Reviewer Expectations
 → Start with: Venue reviewer guidelines, reviewer-guidelines.md
--- a/skills/mlops/ml-paper-writing/references/writing-guide.md
+++ b/skills/mlops/ml-paper-writing/references/writing-guide.md
@ -0,0 +1,476 @@
 # ML Paper Writing Philosophy & Best Practices
 This reference compiles writing advice from prominent ML researchers including Neel Nanda, Andrej Karpathy, Sebastian Farquhar, Zachary Lipton, and Jacob Steinhardt.
 ---
 ## Contents
 - [The Narrative Principle](#the-narrative-principle)
 - [Time Allocation](#time-allocation)
 - [Abstract Writing Formula](#abstract-writing-formula)
 - [Introduction Structure](#introduction-structure)
 - [Sentence-Level Clarity](#sentence-level-clarity)
 - [Word Choice and Precision](#word-choice-and-precision)
 - [Mathematical Writing](#mathematical-writing)
 - [Figure Design](#figure-design)
 - [Common Mistakes to Avoid](#common-mistakes-to-avoid)
 ---
 ## The Narrative Principle
 ### From Neel Nanda
 "A paper is a short, rigorous, evidence-based technical story with a takeaway readers care about."
 The narrative rests on three pillars that must be crystal clear by the end of your introduction:
 **The "What"**: One to three specific novel claims fitting within a cohesive theme. Vague contributions like "we study X" fail immediately—reviewers need precise, falsifiable claims.
 **The "Why"**: Rigorous empirical evidence that convincingly supports those claims, including strong baselines honestly tuned and experiments that distinguish between competing hypotheses rather than merely showing "decent results."
 **The "So What"**: Why readers should care, connecting your contribution to problems the community recognizes as important.
 ### From Andrej Karpathy
 "A paper is not a random collection of experiments you report on. The paper sells a single thing that was not obvious or present before. The entire paper is organized around this core contribution with surgical precision."
 This applies whether you're presenting a new architecture, a theoretical result, or improved understanding of existing methods—NeurIPS explicitly notes that "originality does not necessarily require an entirely new method."
 **Practical Implication**: If you cannot state your contribution in one sentence, you don't yet have a paper. Everything else—experiments, related work, discussion—exists only to support that core claim.
 ---
 ## Time Allocation
 ### From Neel Nanda
 Spend approximately **the same amount of time** on each of:
 1. The abstract
 2. The introduction
 3. The figures
 4. Everything else combined
 This isn't hyperbole—most reviewers form preliminary judgments before reaching your methods section. Readers encounter your paper in a predictable pattern: **title → abstract → introduction → figures → maybe the rest.**
 ### Reviewer Reading Patterns
 Studies of reviewer behavior show:
 - Abstract is read 100% of the time
 - Introduction is skimmed by 90%+ of reviewers
 - Figures are examined before methods by most reviewers
 - Full methods are read only if interest is established
 **Implication**: Front-load your paper's value. Don't bury the contribution.
 ---
 ## Abstract Writing Formula
 ### Sebastian Farquhar's 5-Sentence Formula
 1. **What you achieved**: "We introduce...", "We prove...", "We demonstrate..."
 2. **Why this is hard and important**
 3. **How you do it** (with specialist keywords for discoverability)
 4. **What evidence you have**
 5. **Your most remarkable number/result**
 ### Example (Good Abstract)
 ```
 We prove that gradient descent on overparameterized neural networks
 converges to global minima at a linear rate. [What]
 This resolves a fundamental question about why deep learning works
 despite non-convex optimization landscapes. [Why hard/important]
 Our proof relies on showing that the Neural Tangent Kernel remains
 approximately constant during training, reducing the problem to
 kernel regression. [How with keywords]
 We validate our theory on CIFAR-10 and ImageNet, showing that
 predicted convergence rates match experiments within 5%. [Evidence]
 This is the first polynomial-time convergence guarantee for
 networks with practical depth and width. [Remarkable result]
 ```
 ### What to Avoid
 From Zachary Lipton: "If the first sentence can be pre-pended to any ML paper, delete it."
 **Delete these openings**:
 - "Large language models have achieved remarkable success..."
 - "Deep learning has revolutionized..."
 - "In recent years, neural networks have..."
 **Start with your specific contribution instead.**
 ---
 ## Introduction Structure
 ### Requirements
 - **1-1.5 pages maximum** (in two-column format)
 - **Methods should start by page 2-3**
 - Must include **2-4 bullet contribution list** (max 1-2 lines each)
 ### Structure Template
 ```markdown
 1. Opening Hook (2-3 sentences)
   - State the problem your paper addresses
   - Why it matters RIGHT NOW
 2. Background/Challenge (1 paragraph)
   - What makes this problem hard?
   - What have others tried? Why is it insufficient?
 3. Your Approach (1 paragraph)
   - What do you do differently?
   - Key insight that enables your contribution
 4. Contribution Bullets (2-4 items)
   - Be specific and falsifiable
   - Each bullet: 1-2 lines maximum
 5. Results Preview (2-3 sentences)
   - Most impressive numbers
   - Scope of evaluation
 6. Paper Organization (optional, 1-2 sentences)
   - "Section 2 presents... Section 3 describes..."
 ```
 ### Contribution Bullets: Good vs Bad
 **Good:**
 - We prove that X converges in O(n log n) time under assumption Y
 - We introduce Z, a 3-layer architecture that reduces memory by 40%
 - We demonstrate that A outperforms B by 15% on benchmark C
 **Bad:**
 - We study the problem of X (not a contribution)
 - We provide extensive experiments (too vague)
 - We make several contributions to the field (says nothing)
 ---
 ## Sentence-Level Clarity
 ### From Gopen & Swan: "The Science of Scientific Writing"
 The seminal 1990 paper by George Gopen and Judith Swan establishes that **readers have structural expectations** about where information appears in prose. Violating these expectations forces readers to spend energy on structure rather than content.
 > "If the reader is to grasp what the writer means, the writer must understand what the reader needs."
 #### The 7 Principles of Reader Expectations
 **Principle 1: Subject-Verb Proximity**
 Keep grammatical subject and verb close together. Anything intervening reads as interruption of lesser importance.
 **Weak**: "The model, which was trained on 100M tokens and fine-tuned on domain-specific data using LoRA with rank 16, achieves state-of-the-art results"
 **Strong**: "The model achieves state-of-the-art results after training on 100M tokens and fine-tuning with LoRA (rank 16)"
 **Principle 2: Stress Position (Save the Best for Last)**
 Readers naturally emphasize the **last words of a sentence**. Place your most important information there.
 **Weak**: "Accuracy improves by 15% when using attention"
 **Strong**: "When using attention, accuracy improves by **15%**"
 **Principle 3: Topic Position (First Things First)**
 The beginning of a sentence establishes perspective. Put the "whose story" element first—readers expect the sentence to be about whoever shows up first.
 **Weak**: "A novel attention mechanism that computes alignment scores is introduced"
 **Strong**: "To address the alignment problem, we introduce a novel attention mechanism"
 **Principle 4: Old Information Before New**
 Put familiar information (old) in the topic position for backward linkage; put new information in the stress position for emphasis.
 **Weak**: "Sparse attention was introduced by Child et al. The quadratic complexity of standard attention motivates this work."
 **Strong**: "Standard attention has quadratic complexity. To address this, Child et al. introduced sparse attention."
 **Principle 5: One Unit, One Function**
 Each unit of discourse (sentence, paragraph, section) should serve a single function. If you have two points, use two units.
 **Principle 6: Articulate Action in the Verb**
 Express the action of each sentence in its verb, not in nominalized nouns.
 **Weak**: "We performed an analysis of the results" (nominalization)
 **Strong**: "We analyzed the results" (action in verb)
 **Principle 7: Context Before New Information**
 Provide context before asking the reader to consider anything new. This applies at all levels—sentence, paragraph, section.
 **Weak**: "Equation 3 shows that convergence is guaranteed when the learning rate satisfies..."
 **Strong**: "For convergence to be guaranteed, the learning rate must satisfy the condition in Equation 3..."
 #### Summary Table
 | Principle | Rule | Mnemonic |
 |-----------|------|----------|
 | Subject-Verb Proximity | Keep subject and verb close | "Don't interrupt yourself" |
 | Stress Position | Emphasis at sentence end | "Save the best for last" |
 | Topic Position | Context at sentence start | "First things first" |
 | Old Before New | Familiar → unfamiliar | "Build on known ground" |
 | One Unit, One Function | Each paragraph = one point | "One idea per container" |
 | Action in Verb | Use verbs, not nominalizations | "Verbs do, nouns sit" |
 | Context Before New | Explain before presenting | "Set the stage first" |
 ---
 ---
 ## Micro-Level Writing Tips
 ### From Ethan Perez (Anthropic)
 These practical micro-level tips improve clarity at the sentence and word level.
 #### Pronoun Management
 **Minimize pronouns** ("this," "it," "these," "that"). When pronouns are necessary, use them as adjectives with a noun:
 **Weak**: "This shows that the model converges."
 **Strong**: "This result shows that the model converges."
 **Weak**: "It improves performance."
 **Strong**: "This modification improves performance."
 #### Verb Placement
 **Position verbs early** in sentences for better parsing:
 **Weak**: "The gradient, after being computed and normalized, updates the weights."
 **Strong**: "The gradient updates the weights after being computed and normalized."
 #### Apostrophe Unfolding
 Transform possessive constructions for clarity:
 **Original**: "X's Y" → **Unfolded**: "The Y of X"
 **Before**: "The model's accuracy on the test set"
 **After**: "The accuracy of the model on the test set"
 This isn't always better, but when sentences feel awkward, try unfolding.
 #### Words to Eliminate
 Delete these filler words in almost all cases:
 - "actually"
 - "a bit"
 - "fortunately" / "unfortunately"
 - "very" / "really"
 - "quite"
 - "basically"
 - "essentially"
 - Excessive connectives ("however," "moreover," "furthermore" when not needed)
 #### Sentence Construction Rules
 1. **One idea per sentence** - If struggling to express an idea in one sentence, it needs two
 2. **No repeated sounds** - Avoid similar-sounding words in the same sentence
 3. **Every sentence adds information** - Delete sentences that merely restate
 4. **Active voice always** - Specify the actor ("We find..." not "It is found...")
 5. **Expand contractions** - "don't" → "do not" for formality
 #### Paragraph Architecture
 - **First sentence**: State the point clearly
 - **Middle sentences**: Support with evidence
 - **Last sentence**: Reinforce or transition
 Don't bury key information in the middle of paragraphs.
 ---
 ## Word Choice and Precision
 ### From Zachary Lipton
 **Eliminate hedging** unless genuine uncertainty exists:
 - Delete "may" and "can" unless necessary
 - "provides *very* tight approximation" drips with insecurity
 - "provides tight approximation" is confident
 **Avoid vacuous intensifiers**:
 - Delete: very, extremely, highly, significantly (unless statistical)
 - These words signal insecurity, not strength
 ### From Jacob Steinhardt
 **Precision over brevity**: Replace vague terms with specific ones.
 | Vague | Specific |
 |-------|----------|
 | performance | accuracy, latency, throughput |
 | improves | increases accuracy by X%, reduces latency by Y |
 | large | 1B parameters, 100M tokens |
 | fast | 3x faster, 50ms latency |
 | good results | 92% accuracy, 0.85 F1 |
 **Consistent terminology**: Referring to the same concept with different terms creates confusion.
 **Choose one and stick with it**:
 - "model" vs "network" vs "architecture"
 - "training" vs "learning" vs "optimization"
 - "sample" vs "example" vs "instance"
 ### Vocabulary Signaling
 **Avoid words signaling incremental work**:
 - Never: "combine," "modify," "expand," "extend"
 - Instead: "develop," "propose," "introduce"
 **Why**: "We combine X and Y" sounds like you stapled two existing ideas together. "We develop a method that leverages X for Y" sounds like genuine contribution.
 ---
 ## Mathematical Writing
 ### From Ethan Perez
 **Unfold apostrophes** for clarity:
 - Weak: "X's Y"
 - Strong: "The Y of X"
 Example: "the model's accuracy" → "the accuracy of the model"
 ### General Principles
 1. **State all assumptions formally** before theorems
 2. **Provide intuitive explanations** alongside proofs
 3. **Use consistent notation** throughout the paper
 4. **Define symbols at first use**
 ### Notation Conventions
 ```latex
 % Scalars: lowercase italic
 $x$, $y$, $\alpha$, $\beta$
 % Vectors: lowercase bold
 $\mathbf{x}$, $\mathbf{v}$
 % Matrices: uppercase bold
 $\mathbf{W}$, $\mathbf{X}$
 % Sets: uppercase calligraphic
 $\mathcal{X}$, $\mathcal{D}$
 % Functions: roman for named functions
 $\mathrm{softmax}$, $\mathrm{ReLU}$
 ```
 ---
 ## Figure Design
 ### From Neel Nanda
 Figures should tell a coherent story even if the reader skips the text. Many readers DO skip the text initially.
 ### Design Principles
 1. **Figure 1 is crucial**: Often the first thing readers examine after abstract
 2. **Self-contained captions**: Reader should understand figure without main text
 3. **No title inside figure**: The caption serves this function (ICML/NeurIPS rule)
 4. **Vector graphics**: PDF/EPS for plots, PNG (600 DPI) only for photographs
 ### Accessibility Requirements
 8% of men have color vision deficiency. Your figures must work for them.
 **Solutions**:
 - Use colorblind-safe palettes: Okabe-Ito or Paul Tol
 - Avoid red-green combinations
 - Verify figures work in grayscale
 - Use different line styles (solid, dashed, dotted) in addition to colors
 ### Tools
 ```python
 # SciencePlots: Publication-ready styles
 import matplotlib.pyplot as plt
 plt.style.use(['science', 'ieee'])
 # Or for Nature-style
 plt.style.use(['science', 'nature'])
 ```
 ---
 ## Common Mistakes to Avoid
 ### Structure Mistakes
 | Mistake | Solution |
 |---------|----------|
 | Introduction too long (>1.5 pages) | Move background to Related Work |
 | Methods buried (after page 3) | Front-load contribution, cut intro |
 | Missing contribution bullets | Add 2-4 specific, falsifiable claims |
 | Experiments without explicit claims | State what each experiment tests |
 ### Writing Mistakes
 | Mistake | Solution |
 |---------|----------|
 | Generic abstract opening | Start with your specific contribution |
 | Inconsistent terminology | Choose one term per concept |
 | Passive voice overuse | Use active voice: "We show" not "It is shown" |
 | Hedging everywhere | Be confident unless genuinely uncertain |
 ### Figure Mistakes
 | Mistake | Solution |
 |---------|----------|
 | Raster graphics for plots | Use vector (PDF/EPS) |
 | Red-green color scheme | Use colorblind-safe palette |
 | Title inside figure | Put title in caption |
 | Captions require main text | Make captions self-contained |
 ### Citation Mistakes
 | Mistake | Solution |
 |---------|----------|
 | Paper-by-paper Related Work | Organize methodologically |
 | Missing relevant citations | Reviewers authored papers—cite generously |
 | AI-generated citations | Always verify via APIs |
 | Inconsistent citation format | Use BibLaTeX with consistent keys |
 ---
 ## Pre-Submission Checklist
 Before submitting, verify:
 **Narrative**:
 - [ ] Can state contribution in one sentence
 - [ ] Three pillars (What/Why/So What) clear in intro
 - [ ] Every experiment supports a specific claim
 **Structure**:
 - [ ] Abstract follows 5-sentence formula
 - [ ] Introduction ≤1.5 pages
 - [ ] Methods start by page 2-3
 - [ ] 2-4 contribution bullets included
 - [ ] Limitations section present
 **Writing**:
 - [ ] Consistent terminology throughout
 - [ ] No generic opening sentences
 - [ ] Hedging removed unless necessary
 - [ ] All figures have self-contained captions
 **Technical**:
 - [ ] All citations verified via API
 - [ ] Error bars included with methodology
 - [ ] Compute resources documented
 - [ ] Code/data availability stated
--- a/skills/mlops/ml-paper-writing/templates/README.md
+++ b/skills/mlops/ml-paper-writing/templates/README.md
@ -0,0 +1,251 @@
 # LaTeX Templates for ML/AI Conferences
 This directory contains official LaTeX templates for major machine learning and AI conferences.
 ---
 ## Compiling LaTeX to PDF
 ### Option 1: VS Code with LaTeX Workshop (Recommended)
 **Setup:**
 1. Install [TeX Live](https://www.tug.org/texlive/) (full distribution recommended)
   - macOS: `brew install --cask mactex`
   - Ubuntu: `sudo apt install texlive-full`
   - Windows: Download from [tug.org/texlive](https://www.tug.org/texlive/)
 2. Install VS Code extension: **LaTeX Workshop** by James Yu
   - Open VS Code → Extensions (Cmd/Ctrl+Shift+X) → Search "LaTeX Workshop" → Install
 **Usage:**
 - Open any `.tex` file in VS Code
 - Save the file (Cmd/Ctrl+S) → Auto-compiles to PDF
 - Click the green play button or use `Cmd/Ctrl+Alt+B` to build
 - View PDF: Click "View LaTeX PDF" icon or `Cmd/Ctrl+Alt+V`
 - Side-by-side view: `Cmd/Ctrl+Alt+V` then drag tab
 **Settings** (add to VS Code `settings.json`):
 ```json
 {
  "latex-workshop.latex.autoBuild.run": "onSave",
  "latex-workshop.view.pdf.viewer": "tab",
  "latex-workshop.latex.recipes": [
    {
      "name": "pdflatex → bibtex → pdflatex × 2",
      "tools": ["pdflatex", "bibtex", "pdflatex", "pdflatex"]
    }
  ]
 }
 ```
 ### Option 2: Command Line
 ```bash
 # Basic compilation
 pdflatex main.tex
 # With bibliography (full workflow)
 pdflatex main.tex
 bibtex main
 pdflatex main.tex
 pdflatex main.tex
 # Using latexmk (handles dependencies automatically)
 latexmk -pdf main.tex
 # Continuous compilation (watches for changes)
 latexmk -pdf -pvc main.tex
 ```
 ### Option 3: Overleaf (Online)
 1. Go to [overleaf.com](https://www.overleaf.com)
 2. New Project → Upload Project → Upload the template folder as ZIP
 3. Edit online with real-time PDF preview
 4. No local installation needed
 ### Option 4: Other IDEs
 | IDE | Extension/Plugin | Notes |
 |-----|------------------|-------|
 | **Cursor** | LaTeX Workshop | Same as VS Code |
 | **Sublime Text** | LaTeXTools | Popular, well-maintained |
 | **Vim/Neovim** | VimTeX | Powerful, keyboard-driven |
 | **Emacs** | AUCTeX | Comprehensive LaTeX environment |
 | **TeXstudio** | Built-in | Dedicated LaTeX IDE |
 | **Texmaker** | Built-in | Cross-platform LaTeX editor |
 ### Troubleshooting Compilation
 **"File not found" errors:**
 ```bash
 # Ensure you're in the template directory
 cd templates/icml2026
 pdflatex example_paper.tex
 ```
 **Bibliography not appearing:**
 ```bash
 # Run bibtex after first pdflatex
 pdflatex main.tex
 bibtex main        # Uses main.aux to find citations
 pdflatex main.tex  # Incorporates bibliography
 pdflatex main.tex  # Resolves references
 ```
 **Missing packages:**
 ```bash
 # TeX Live package manager
 tlmgr install <package-name>
 # Or install full distribution to avoid this
 ```
 ---
 ## Available Templates
 | Conference | Directory | Year | Source |
 |------------|-----------|------|--------|
 | ICML | `icml2026/` | 2026 | [Official ICML](https://icml.cc/Conferences/2026/AuthorInstructions) |
 | ICLR | `iclr2026/` | 2026 | [Official GitHub](https://github.com/ICLR/Master-Template) |
 | NeurIPS | `neurips2025/` | 2025 | Community template |
 | ACL | `acl/` | 2025+ | [Official ACL](https://github.com/acl-org/acl-style-files) |
 | AAAI | `aaai2026/` | 2026 | [AAAI Author Kit](https://aaai.org/authorkit26/) |
 | COLM | `colm2025/` | 2025 | [Official COLM](https://github.com/COLM-org/Template) |
 ## Usage
 ### ICML 2026
 ```latex
 \documentclass{article}
 \usepackage{icml2026}  % For submission
 % \usepackage[accepted]{icml2026}  % For camera-ready
 \begin{document}
 % Your paper content
 \end{document}
 ```
 Key files:
 - `icml2026.sty` - Style file
 - `icml2026.bst` - Bibliography style
 - `example_paper.tex` - Example document
 ### ICLR 2026
 ```latex
 \documentclass{article}
 \usepackage[submission]{iclr2026_conference}  % For submission
 % \usepackage[final]{iclr2026_conference}  % For camera-ready
 \begin{document}
 % Your paper content
 \end{document}
 ```
 Key files:
 - `iclr2026_conference.sty` - Style file
 - `iclr2026_conference.bst` - Bibliography style
 - `iclr2026_conference.tex` - Example document
 ### ACL Venues (ACL, EMNLP, NAACL)
 ```latex
 \documentclass[11pt]{article}
 \usepackage[review]{acl}  % For review
 % \usepackage{acl}  % For camera-ready
 \begin{document}
 % Your paper content
 \end{document}
 ```
 Key files:
 - `acl.sty` - Style file
 - `acl_natbib.bst` - Bibliography style
 - `acl_latex.tex` - Example document
 ### AAAI 2026
 ```latex
 \documentclass[letterpaper]{article}
 \usepackage[submission]{aaai2026}  % For submission
 % \usepackage{aaai2026}  % For camera-ready
 \begin{document}
 % Your paper content
 \end{document}
 ```
 Key files:
 - `aaai2026.sty` - Style file
 - `aaai2026.bst` - Bibliography style
 ### COLM 2025
 ```latex
 \documentclass{article}
 \usepackage[submission]{colm2025_conference}  % For submission
 % \usepackage[final]{colm2025_conference}  % For camera-ready
 \begin{document}
 % Your paper content
 \end{document}
 ```
 Key files:
 - `colm2025_conference.sty` - Style file
 - `colm2025_conference.bst` - Bibliography style
 ## Page Limits Summary
 | Conference | Submission | Camera-Ready | Notes |
 |------------|-----------|--------------|-------|
 | ICML 2026 | 8 pages | 9 pages | +unlimited refs/appendix |
 | ICLR 2026 | 9 pages | 10 pages | +unlimited refs/appendix |
 | NeurIPS 2025 | 9 pages | 9 pages | +checklist outside limit |
 | ACL 2025 | 8 pages (long) | varies | +unlimited refs/appendix |
 | AAAI 2026 | 7 pages | 8 pages | +unlimited refs/appendix |
 | COLM 2025 | 9 pages | 10 pages | +unlimited refs/appendix |
 ## Common Issues
 ### Compilation Errors
 1. **Missing packages**: Install full TeX distribution (TeX Live Full or MikTeX)
 2. **Bibliography errors**: Use the provided `.bst` file with `\bibliographystyle{}`
 3. **Font warnings**: Install `cm-super` or use `\usepackage{lmodern}`
 ### Anonymization
 For submission, ensure:
 - No author names in `\author{}`
 - No acknowledgments section
 - No grant numbers
 - Use anonymous repositories
 - Cite own work in third person
 ### Common LaTeX Packages
 ```latex
 % Recommended packages (check compatibility with venue style)
 \usepackage{amsmath,amsthm,amssymb}  % Math
 \usepackage{graphicx}                 % Figures
 \usepackage{booktabs}                 % Tables
 \usepackage{hyperref}                 % Links
 \usepackage{algorithm,algorithmic}    % Algorithms
 \usepackage{natbib}                   % Citations
 ```
 ## Updating Templates
 Templates are updated annually. Check official sources before each submission:
 - ICML: https://icml.cc/
 - ICLR: https://iclr.cc/
 - NeurIPS: https://neurips.cc/
 - ACL: https://github.com/acl-org/acl-style-files
 - AAAI: https://aaai.org/
 - COLM: https://colmweb.org/
--- a/skills/mlops/ml-paper-writing/templates/aaai2026/README.md
+++ b/skills/mlops/ml-paper-writing/templates/aaai2026/README.md
@ -0,0 +1,534 @@
 # AAAI 2026 统一LaTeX模板使用说明 / AAAI 2026 Unified LaTeX Template Guide
 > **📝 重要说明 / Important Notice**: 本仓库借助Cursor在AAAI 2026官方模板基础上改进得到。如果遇到不满足或有冲突的情况，请积极提issues。
 > 
 > **📝 Important Notice**: This repository is improved based on the official AAAI 2026 template with the assistance of Cursor. If you encounter any issues or conflicts, please actively submit issues.
 [中文](#中文版本) | [English](#english-version)
 ---
 ## 🌐 在线查看 / Online Access
 **📖 在线阅读和测试模板**: [https://cn.overleaf.com/read/wyhcnvcrtpyt#cd4a07](https://cn.overleaf.com/read/wyhcnvcrtpyt#cd4a07)
 **📖 Online View and Test Template**: [https://cn.overleaf.com/read/wyhcnvcrtpyt#cd4a07](https://cn.overleaf.com/read/wyhcnvcrtpyt#cd4a07)
 💡 **提示 / Tips**: 
 - 中文：您可以通过上述链接在Overleaf中直接查看、编辑和编译模板，无需本地安装LaTeX环境
 - English: You can view, edit, and compile the template directly in Overleaf using the link above, without needing a local LaTeX installation
 ---
 ## 中文版本
 ### 概述 ✅
 我已经将AAAI 2026的两个版本（匿名投稿版本和camera-ready版本）**完整合并**成一个统一的模板文件 `aaai2026-unified-template.tex`。
 该模板包含了原始两个模板的**所有完整内容**（共886行，比原始文件更全面），包括：
 - 所有格式化说明和要求
 - 完整的示例代码和表格
 - 图片处理指南
 - 参考文献格式要求
 - 所有章节和附录内容
 - 版本特定的Acknowledgments部分
 ### 主要差异分析
 通过比较原始的两个模板，我发现主要差异在于：
 #### 1. 包的加载方式
 - **匿名版本**: `\usepackage[submission]{aaai2026}`
 - **Camera-ready版本**: `\usepackage{aaai2026}`
 #### 2. 标题差异
 - **匿名版本**: "AAAI Press Anonymous Submission Instructions for Authors Using LaTeX"
 - **Camera-ready版本**: "AAAI Press Formatting Instructions for Authors Using LaTeX --- A Guide"
 #### 3. Links环境的处理
 - **匿名版本**: Links环境被注释掉，防止泄露作者身份
 - **Camera-ready版本**: Links环境正常显示
 #### 4. 内容部分差异
 - **匿名版本**: 包含"Preparing an Anonymous Submission"部分的特殊说明
 - **Camera-ready版本**: 包含完整的格式说明和版权信息
 ### 依赖文件检查结果
 ✅ **已验证并复制到主目录的文件**：
 - `aaai2026.sty` - AAAI 2026 样式文件（两个版本完全相同）
 - `aaai2026.bst` - 参考文献样式文件（两个版本完全相同）
 - `aaai2026.bib` - 示例参考文献文件
 - `figure1.pdf` 和 `figure2.pdf` - 示例图片文件
 所有这些文件在两个版本中都是相同的，因此统一模板可以正常工作。
 ### 如何使用统一模板
 #### 切换到匿名投稿版本
 在模板文件第11行，**取消注释**这一行：
 ```latex
 \def\aaaianonymous{true}
 ```
 #### 切换到Camera-ready版本
 在模板文件第11行，**注释掉**或**删除**这一行：
 ```latex
 % \def\aaaianonymous{true}
 ```
 ### 一键切换的核心机制
 统一模板使用了LaTeX的条件编译功能：
 ```latex
 % 条件包加载
 \ifdefined\aaaianonymous
    \usepackage[submission]{aaai2026}  % 匿名版本
 \else
    \usepackage{aaai2026}              % Camera-ready版本
 \fi
 % 条件标题设置
 \ifdefined\aaaianonymous
    \title{AAAI Press Anonymous Submission\\Instructions for Authors Using \LaTeX{}}
 \else
    \title{AAAI Press Formatting Instructions \\for Authors Using \LaTeX{} --- A Guide}
 \fi
 % 条件内容显示
 \ifdefined\aaaianonymous
    % 匿名版本特有内容
 \else
    % Camera-ready版本特有内容
 \fi
 ```
 ### 文件清单
 主目录现在包含以下文件：
 - `aaai2026-unified-template.tex` - 统一主论文模板文件
 - `aaai2026-unified-supp.tex` - 统一补充材料模板文件
 - `aaai2026.sty` - AAAI 2026 LaTeX 样式文件
 - `aaai2026.bst` - 参考文献样式文件  
 - `aaai2026.bib` - 示例参考文献文件
 - `figure1.pdf` - 示例图片1
 - `figure2.pdf` - 示例图片2
 - `README.md` - 本说明文档
 ### 补充材料模板 (Supplementary Material Template)
 #### 概述
 `aaai2026-unified-supp.tex` 是专门为AAAI 2026补充材料设计的统一模板，与主论文模板使用相同的版本切换机制。
 #### 主要功能
 - **版本切换**: 通过修改一行代码在匿名投稿和camera-ready版本间切换
 - **补充内容支持**: 支持额外的实验、推导、数据、图表、算法等
 - **格式一致性**: 与主论文模板保持完全一致的格式要求
 - **代码示例**: 包含算法、代码列表等补充材料的示例
 #### 使用方法
 与主论文模板相同，只需修改第11行：
 ```latex
 % 匿名投稿版本
 \def\aaaianonymous{true}
 % Camera-ready版本  
 % \def\aaaianonymous{true}
 ```
 #### 补充材料内容建议
 - 额外的实验结果和消融研究
 - 详细的数学推导和证明
 - 更多的图表和可视化
 - 算法伪代码和实现细节
 - 数据集描述和预处理步骤
 - 超参数设置和实验配置
 - 失败案例分析
 - 计算复杂度分析
 ### 使用检查清单 (Usage Checklist)
 #### 📋 投稿前检查清单 (Pre-Submission Checklist)
 **版本设置**:
 - [ ] 已设置 `\def\aaaianonymous{true}` (匿名投稿)
 - [ ] 已注释掉所有可能暴露身份的信息
 - [ ] 已匿名化参考文献（移除作者姓名）
 **内容完整性**:
 - [ ] 标题、摘要、关键词已填写
 - [ ] 所有章节内容完整
 - [ ] 图表编号连续且正确
 - [ ] 参考文献格式正确
 - [ ] 补充材料（如有）已准备
 **格式检查**:
 - [ ] 页面边距符合要求
 - [ ] 字体和字号正确
 - [ ] 行间距符合标准
 - [ ] 图表位置和大小合适
 - [ ] 数学公式格式正确
 **技术检查**:
 - [ ] LaTeX编译无错误
 - [ ] 参考文献正确生成
 - [ ] PDF输出正常
 - [ ] 文件大小在限制范围内
 #### 📋 录用后检查清单 (Post-Acceptance Checklist)
 **版本切换**:
 - [ ] 已注释掉 `\def\aaaianonymous{true}` (camera-ready)
 - [ ] 已添加完整的作者信息
 - [ ] 已添加所有作者单位信息
 - [ ] 已恢复所有被注释的内容
 **内容更新**:
 - [ ] 已根据审稿意见修改内容
 - [ ] 已更新所有图表和实验
 - [ ] 已完善补充材料
 - [ ] 已检查所有链接和引用
 **最终检查**:
 - [ ] 最终PDF质量检查
 - [ ] 所有文件已备份
 - [ ] 符合会议最终提交要求
 - [ ] 补充材料已单独提交（如需要）
 #### 📋 补充材料检查清单 (Supplementary Material Checklist)
 **内容组织**:
 - [ ] 补充材料与主论文内容对应
 - [ ] 章节结构清晰合理
 - [ ] 图表编号与主论文不冲突
 - [ ] 参考文献格式一致
 **技术细节**:
 - [ ] 算法伪代码清晰完整
 - [ ] 实验设置详细说明
 - [ ] 数据预处理步骤明确
 - [ ] 超参数配置完整
 **格式要求**:
 - [ ] 使用统一的supp模板
 - [ ] 页面设置与主论文一致
 - [ ] 字体和格式符合要求
 - [ ] 文件大小在限制范围内
 ### 实际使用建议
 1. **投稿阶段**: 
   - 取消注释 `\def\aaaianonymous{true}` 
   - 确保不包含任何可能暴露身份的信息
   - 检查参考文献是否已匿名化
 2. **录用后准备final版本**:
   - 注释掉或删除 `\def\aaaianonymous{true}` 这一行
   - 添加完整的作者信息和affiliations
   - 取消注释links环境（如果需要）
 3. **编译测试**:
   - 分别在两种模式下编译，确保都能正常工作
   - 检查输出的PDF是否符合要求
   - 验证参考文献格式是否正确
 4. **依赖文件确认**:
   - 确保所有依赖文件都在同一目录下
   - 如果移动模板文件，记得同时移动依赖文件
 ### 重要注意事项
 ⚠️ **关于Bibliography Style**:
 - `aaai2026.sty`文件已经自动设置了`\bibliographystyle{aaai2026}`
 - **不要**在文档中再次添加`\bibliographystyle{aaai2026}`命令
 - 否则会出现"`Illegal, another \bibstyle command`"错误
 - 只需要使用`\bibliography{aaai2026}`命令即可
 ### 编译命令示例
 ```bash
 # 编译LaTeX文档
 pdflatex aaai2026-unified-template.tex
 bibtex aaai2026-unified-template
 pdflatex aaai2026-unified-template.tex
 pdflatex aaai2026-unified-template.tex
 ```
 ### 常见问题解决
 #### 1. "Illegal, another \bibstyle command"错误
 **原因**: 重复设置了bibliography style  
 **解决方案**: 删除文档中的`\bibliographystyle{aaai2026}`命令，`aaai2026.sty`会自动处理
 #### 2. 参考文献格式不正确
 **原因**: 可能缺少natbib包或者BibTeX文件问题  
 **解决方案**: 确保按照标准的LaTeX编译流程：pdflatex → bibtex → pdflatex → pdflatex
 ---
 ## English Version
 ### Overview ✅
 I have **completely merged** the two AAAI 2026 versions (anonymous submission and camera-ready) into a single unified template file `aaai2026-unified-template.tex`.
 This template contains **all complete content** from both original templates (886 lines total, more comprehensive than the original files), including:
 - All formatting instructions and requirements
 - Complete example codes and tables
 - Image processing guidelines
 - Reference formatting requirements
 - All sections and appendix content
 - Version-specific Acknowledgments sections
 ### Key Differences Analysis
 By comparing the two original templates, the main differences are:
 #### 1. Package Loading Method
 - **Anonymous version**: `\usepackage[submission]{aaai2026}`
 - **Camera-ready version**: `\usepackage{aaai2026}`
 #### 2. Title Differences
 - **Anonymous version**: "AAAI Press Anonymous Submission Instructions for Authors Using LaTeX"
 - **Camera-ready version**: "AAAI Press Formatting Instructions for Authors Using LaTeX --- A Guide"
 #### 3. Links Environment Handling
 - **Anonymous version**: Links environment commented out to prevent identity disclosure
 - **Camera-ready version**: Links environment displayed normally
 #### 4. Content Section Differences
 - **Anonymous version**: Contains special instructions in "Preparing an Anonymous Submission" section
 - **Camera-ready version**: Contains complete formatting instructions and copyright information
 ### Dependency Files Verification
 ✅ **Files verified and copied to main directory**:
 - `aaai2026.sty` - AAAI 2026 style file (identical in both versions)
 - `aaai2026.bst` - Bibliography style file (identical in both versions)
 - `aaai2026.bib` - Sample bibliography file
 - `figure1.pdf` and `figure2.pdf` - Sample image files
 All these files are identical in both versions, so the unified template works properly.
 ### How to Use the Unified Template
 #### Switch to Anonymous Submission Version
 On line 11 of the template file, **uncomment** this line:
 ```latex
 \def\aaaianonymous{true}
 ```
 #### Switch to Camera-ready Version
 On line 11 of the template file, **comment out** or **delete** this line:
 ```latex
 % \def\aaaianonymous{true}
 ```
 ### Core Mechanism of One-Click Switching
 The unified template uses LaTeX conditional compilation:
 ```latex
 % Conditional package loading
 \ifdefined\aaaianonymous
    \usepackage[submission]{aaai2026}  % Anonymous version
 \else
    \usepackage{aaai2026}              % Camera-ready version
 \fi
 % Conditional title setting
 \ifdefined\aaaianonymous
    \title{AAAI Press Anonymous Submission\\Instructions for Authors Using \LaTeX{}}
 \else
    \title{AAAI Press Formatting Instructions \\for Authors Using \LaTeX{} --- A Guide}
 \fi
 % Conditional content display
 \ifdefined\aaaianonymous
    % Anonymous version specific content
 \else
    % Camera-ready version specific content
 \fi
 ```
 ### File List
 The main directory now contains the following files:
 - `aaai2026-unified-template.tex` - Unified main paper template file
 - `aaai2026-unified-supp.tex` - Unified supplementary material template file
 - `aaai2026.sty` - AAAI 2026 LaTeX style file
 - `aaai2026.bst` - Bibliography style file
 - `aaai2026.bib` - Sample bibliography file
 - `figure1.pdf` - Sample image 1
 - `figure2.pdf` - Sample image 2
 - `README.md` - This documentation
 ### Supplementary Material Template
 #### Overview
 `aaai2026-unified-supp.tex` is a unified template specifically designed for AAAI 2026 supplementary materials, using the same version switching mechanism as the main paper template.
 #### Key Features
 - **Version Switching**: Switch between anonymous submission and camera-ready versions by modifying one line of code
 - **Supplementary Content Support**: Supports additional experiments, derivations, data, figures, algorithms, etc.
 - **Format Consistency**: Maintains complete format consistency with the main paper template
 - **Code Examples**: Includes examples for algorithms, code listings, and other supplementary materials
 #### Usage
 Same as the main paper template, just modify line 11:
 ```latex
 % Anonymous submission version
 \def\aaaianonymous{true}
 % Camera-ready version
 % \def\aaaianonymous{true}
 ```
 #### Supplementary Material Content Suggestions
 - Additional experimental results and ablation studies
 - Detailed mathematical derivations and proofs
 - More figures and visualizations
 - Algorithm pseudocode and implementation details
 - Dataset descriptions and preprocessing steps
 - Hyperparameter settings and experimental configurations
 - Failure case analysis
 - Computational complexity analysis
 ### Usage Checklist
 #### 📋 Pre-Submission Checklist
 **Version Setup**:
 - [ ] Set `\def\aaaianonymous{true}` (anonymous submission)
 - [ ] Commented out all information that could reveal identity
 - [ ] Anonymized references (removed author names)
 **Content Completeness**:
 - [ ] Title, abstract, and keywords filled
 - [ ] All sections complete
 - [ ] Figure and table numbers consecutive and correct
 - [ ] Reference format correct
 - [ ] Supplementary materials prepared (if any)
 **Format Check**:
 - [ ] Page margins meet requirements
 - [ ] Font and font size correct
 - [ ] Line spacing meets standards
 - [ ] Figure and table positions and sizes appropriate
 - [ ] Mathematical formula format correct
 **Technical Check**:
 - [ ] LaTeX compilation error-free
 - [ ] References generated correctly
 - [ ] PDF output normal
 - [ ] File size within limits
 #### 📋 Post-Acceptance Checklist
 **Version Switch**:
 - [ ] Commented out `\def\aaaianonymous{true}` (camera-ready)
 - [ ] Added complete author information
 - [ ] Added all author affiliation information
 - [ ] Restored all commented content
 **Content Updates**:
 - [ ] Modified content according to reviewer comments
 - [ ] Updated all figures and experiments
 - [ ] Completed supplementary materials
 - [ ] Checked all links and citations
 **Final Check**:
 - [ ] Final PDF quality check
 - [ ] All files backed up
 - [ ] Meets conference final submission requirements
 - [ ] Supplementary materials submitted separately (if needed)
 #### 📋 Supplementary Material Checklist
 **Content Organization**:
 - [ ] Supplementary materials correspond to main paper content
 - [ ] Chapter structure clear and reasonable
 - [ ] Figure and table numbers don't conflict with main paper
 - [ ] Reference format consistent
 **Technical Details**:
 - [ ] Algorithm pseudocode clear and complete
 - [ ] Experimental setup explained in detail
 - [ ] Data preprocessing steps clear
 - [ ] Hyperparameter configuration complete
 **Format Requirements**:
 - [ ] Using unified supp template
 - [ ] Page settings consistent with main paper
 - [ ] Font and format meet requirements
 - [ ] File size within limits
 ### Practical Usage Recommendations
 1. **Submission Stage**: 
   - Uncomment `\def\aaaianonymous{true}` 
   - Ensure no information that could reveal identity is included
   - Check that references are anonymized
 2. **Preparing final version after acceptance**:
   - Comment out or delete the `\def\aaaianonymous{true}` line
   - Add complete author information and affiliations
   - Uncomment links environment (if needed)
 3. **Compilation Testing**:
   - Compile in both modes to ensure proper functionality
   - Check if the output PDF meets requirements
   - Verify reference formatting is correct
 4. **Dependency File Confirmation**:
   - Ensure all dependency files are in the same directory
   - Remember to move dependency files when moving the template file
 ### Important Notes
 ⚠️ **About Bibliography Style**:
 - The `aaai2026.sty` file automatically sets `\bibliographystyle{aaai2026}`
 - **Do NOT** add `\bibliographystyle{aaai2026}` command again in your document
 - Otherwise you'll get "`Illegal, another \bibstyle command`" error
 - Just use the `\bibliography{aaai2026}` command
 ### Compilation Commands Example
 ```bash
 # Compile LaTeX document
 pdflatex aaai2026-unified-template.tex
 bibtex aaai2026-unified-template
 pdflatex aaai2026-unified-template.tex
 pdflatex aaai2026-unified-template.tex
 ```
 ### Common Issues and Solutions
 #### 1. "Illegal, another \bibstyle command" Error
 **Cause**: Duplicate bibliography style setting  
 **Solution**: Remove the `\bibliographystyle{aaai2026}` command from your document, `aaai2026.sty` handles it automatically
 #### 2. Incorrect Reference Format
 **Cause**: Missing natbib package or BibTeX file issues  
 **Solution**: Follow the standard LaTeX compilation process: pdflatex → bibtex → pdflatex → pdflatex
 ---
 ## 版本信息 / Version Information
 - **模板版本 / Template Version**: AAAI 2026 Unified (Main + Supplementary)
 - **创建日期 / Created**: 2024年12月
 - **支持格式 / Supported Formats**: Anonymous Submission & Camera-Ready
 - **模板类型 / Template Types**: Main Paper Template & Supplementary Material Template
 - **兼容性 / Compatibility**: LaTeX 2020+ / TeXLive 2024+
 ---
 🎉 **现在您只需要修改一行代码就可以在两个版本之间切换，同时所有必要的依赖文件都已经准备就绪！**  
 🎉 **Now you only need to modify one line of code to switch between the two versions, with all necessary dependency files ready to use!**
--- a/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex
+++ b/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex
@ -0,0 +1,144 @@
 %File: aaai2026-unified-supp.tex
 %
 % UNIFIED AAAI 2026 SUPPLEMENTARY MATERIAL TEMPLATE
 % To switch between anonymous submission and camera-ready versions,
 % simply change the next line:
 %
 % For ANONYMOUS SUBMISSION: uncomment the next line
 % \def\aaaianonymous{true}
 %
 % For CAMERA-READY VERSION: comment out or delete the next line
 % \def\aaaianonymous{true}
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \documentclass[letterpaper]{article} % DO NOT CHANGE THIS
 % Conditional package loading based on version
 \ifdefined\aaaianonymous
    \usepackage[submission]{aaai2026}  % Anonymous submission version
 \else
    \usepackage{aaai2026}              % Camera-ready version
 \fi
 \usepackage{times}  % DO NOT CHANGE THIS
 \usepackage{helvet}  % DO NOT CHANGE THIS
 \usepackage{courier}  % DO NOT CHANGE THIS
 \usepackage[hyphens]{url}  % DO NOT CHANGE THIS
 \usepackage{graphicx} % DO NOT CHANGE THIS
 \urlstyle{rm} % DO NOT CHANGE THIS
 \def\UrlFont{\rm}  % DO NOT CHANGE THIS
 \usepackage{natbib}  % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
 \usepackage{caption} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
 \frenchspacing  % DO NOT CHANGE THIS
 \setlength{\pdfpagewidth}{8.5in} % DO NOT CHANGE THIS
 \setlength{\pdfpageheight}{11in} % DO NOT CHANGE THIS
 % These are recommended to typeset algorithms but not required.
 \usepackage{algorithm}
 \usepackage{algorithmic}
 % These are recommended to typeset listings but not required.
 \usepackage{newfloat}
 \usepackage{listings}
 \DeclareCaptionStyle{ruled}{labelfont=normalfont,labelsep=colon,strut=off} % DO NOT CHANGE THIS
 \lstset{% 
 	basicstyle={\footnotesize\ttfamily},
 	numbers=left,numberstyle=\footnotesize,xleftmargin=2em,
 	aboveskip=0pt,belowskip=0pt,
 	showstringspaces=false,tabsize=2,breaklines=true}
 \floatstyle{ruled}
 \newfloat{listing}{tb}{lst}{}
 \floatname{listing}{Listing}
 \pdfinfo{
 /TemplateVersion (2026.1)
 }
 \setcounter{secnumdepth}{0} %May be changed to 1 or 2 if section numbers are desired.
 % Title - conditionally set based on version
 \ifdefined\aaaianonymous
    \title{AAAI 2026 Supplementary Material\\Anonymous Submission}
 \else
    \title{AAAI 2026 Supplementary Material\\Camera Ready}
 \fi
 % Author and affiliation information
 \ifdefined\aaaianonymous
 \author{
    Anonymous Submission
 }
 \affiliations{
    % Leave affiliations empty for anonymous submission
 }
 \else
 \author{
    %Authors
    Written by AAAI Press Staff\textsuperscript{\rm 1}\thanks{With help from the AAAI Publications Committee.}\\
    AAAI Style Contributions by Pater Patel Schneider,
    Sunil Issar,\\
    J. Scott Penberthy,
    George Ferguson,
    Hans Guesgen,
    Francisco Cruz\equalcontrib,
    Marc Pujol-Gonzalez\equalcontrib
 }
 \affiliations{
    \textsuperscript{\rm 1}Association for the Advancement of Artificial Intelligence\\
    1101 Pennsylvania Ave, NW Suite 300\\
    Washington, DC 20004 USA\\
    proceedings-questions@aaai.org
 }
 \fi
 \begin{document}
 \maketitle
 \begin{abstract}
 This document provides supplementary material for the main paper, including additional experiments, derivations, data, figures, algorithms, and other relevant content. Please add detailed information as needed. This supplementary material is submitted together with the main paper to further support and complement the main findings.
 \end{abstract}
 % ----------- Supplementary Content Starts Here -----------
 \section{Example Supplementary Content}
 This is the main body of the supplementary material. You may add extra experimental results, ablation studies, detailed derivations, additional figures, pseudocode, dataset descriptions, etc.
 \subsection{Additional Experiments}
 % Example: Insert a figure
 % Uncomment and modify the following lines to add your own figures:
 % \begin{figure}[h]
 % \centering
 % \includegraphics[width=0.9\columnwidth]{your-figure-name}
 % \caption{Your figure caption here.}
 % \label{fig:supp1}
 % \end{figure}
 \subsection{Detailed Derivations}
 You may provide detailed mathematical derivations, proofs, or other technical details here.
 \subsection{Pseudocode}
 \begin{algorithm}[h]
 \caption{Example Supplementary Algorithm}
 \begin{algorithmic}[1]
 \STATE Initialize parameters
 \FOR{each sample}
    \STATE Compute loss
    \STATE Update parameters
 \ENDFOR
 \STATE \textbf{return} optimal parameters
 \end{algorithmic}
 \end{algorithm}
 % ----------- Supplementary Content Ends Here -----------
 % References and End of Paper
 % These lines must be placed at the end of your paper
 \bibliography{aaai2026}
 \end{document} 
--- a/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026-unified-template.tex
+++ b/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026-unified-template.tex
@ -0,0 +1,952 @@
 %File: aaai2026-unified-template.tex
 %
 % UNIFIED AAAI 2026 TEMPLATE 
 % To switch between anonymous submission and camera-ready versions,
 % simply change the next line:
 %
 % For ANONYMOUS SUBMISSION: uncomment the next line
 % \def\aaaianonymous{true}
 %
 % For CAMERA-READY VERSION: comment out or delete the next line
 % \def\aaaianonymous{true}
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \documentclass[letterpaper]{article} % DO NOT CHANGE THIS
 % Conditional package loading based on version
 \ifdefined\aaaianonymous
    \usepackage[submission]{aaai2026}  % Anonymous submission version
 \else
    \usepackage{aaai2026}              % Camera-ready version
 \fi
 \usepackage{times}  % DO NOT CHANGE THIS
 \usepackage{helvet}  % DO NOT CHANGE THIS
 \usepackage{courier}  % DO NOT CHANGE THIS
 \usepackage[hyphens]{url}  % DO NOT CHANGE THIS
 \usepackage{graphicx} % DO NOT CHANGE THIS
 \urlstyle{rm} % DO NOT CHANGE THIS
 \def\UrlFont{\rm}  % DO NOT CHANGE THIS
 \usepackage{natbib}  % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
 \usepackage{caption} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
 \frenchspacing  % DO NOT CHANGE THIS
 \setlength{\pdfpagewidth}{8.5in} % DO NOT CHANGE THIS
 \setlength{\pdfpageheight}{11in} % DO NOT CHANGE THIS
 %
 % These are recommended to typeset algorithms but not required. See the subsubsection on algorithms. Remove them if you don't have algorithms in your paper.
 \usepackage{algorithm}
 \usepackage{algorithmic}
 %
 % These are are recommended to typeset listings but not required. See the subsubsection on listing. Remove this block if you don't have listings in your paper.
 \usepackage{newfloat}
 \usepackage{listings}
 \DeclareCaptionStyle{ruled}{labelfont=normalfont,labelsep=colon,strut=off} % DO NOT CHANGE THIS
 \lstset{%
 	basicstyle={\footnotesize\ttfamily},% footnotesize acceptable for monospace
 	numbers=left,numberstyle=\footnotesize,xleftmargin=2em,% show line numbers, remove this entire line if you don't want the numbers.
 	aboveskip=0pt,belowskip=0pt,%
 	showstringspaces=false,tabsize=2,breaklines=true}
 \floatstyle{ruled}
 \newfloat{listing}{tb}{lst}{}
 \floatname{listing}{Listing}
 %
 % Keep the \pdfinfo as shown here. There's no need
 % for you to add the /Title and /Author tags.
 \pdfinfo{
 /TemplateVersion (2026.1)
 }
 % DISALLOWED PACKAGES
 % \usepackage{authblk} -- This package is specifically forbidden
 % \usepackage{balance} -- This package is specifically forbidden
 % \usepackage{color (if used in text)
 % \usepackage{CJK} -- This package is specifically forbidden
 % \usepackage{float} -- This package is specifically forbidden
 % \usepackage{flushend} -- This package is specifically forbidden
 % \usepackage{fontenc} -- This package is specifically forbidden
 % \usepackage{fullpage} -- This package is specifically forbidden
 % \usepackage{geometry} -- This package is specifically forbidden
 % \usepackage{grffile} -- This package is specifically forbidden
 % \usepackage{hyperref} -- This package is specifically forbidden
 % \usepackage{navigator} -- This package is specifically forbidden
 % (or any other package that embeds links such as navigator or hyperref)
 % \indentfirst} -- This package is specifically forbidden
 % \layout} -- This package is specifically forbidden
 % \multicol} -- This package is specifically forbidden
 % \nameref} -- This package is specifically forbidden
 % \usepackage{savetrees} -- This package is specifically forbidden
 % \usepackage{setspace} -- This package is specifically forbidden
 % \usepackage{stfloats} -- This package is specifically forbidden
 % \usepackage{tabu} -- This package is specifically forbidden
 % \usepackage{titlesec} -- This package is specifically forbidden
 % \usepackage{tocbibind} -- This package is specifically forbidden
 % \usepackage{ulem} -- This package is specifically forbidden
 % \usepackage{wrapfig} -- This package is specifically forbidden
 % DISALLOWED COMMANDS
 % \nocopyright -- Your paper will not be published if you use this command
 % \addtolength -- This command may not be used
 % \balance -- This command may not be used
 % \baselinestretch -- Your paper will not be published if you use this command
 % \clearpage -- No page breaks of any kind may be used for the final version of your paper
 % \columnsep -- This command may not be used
 % \newpage -- No page breaks of any kind may be used for the final version of your paper
 % \pagebreak -- No page breaks of any kind may be used for the final version of your paperr
 % \pagestyle -- This command may not be used
 % \tiny -- This is not an acceptable font size.
 % \vspace{- -- No negative value may be used in proximity of a caption, figure, table, section, subsection, subsubsection, or reference
 % \vskip{- -- No negative value may be used to alter spacing above or below a caption, figure, table, section, subsection, subsubsection, or reference
 \setcounter{secnumdepth}{0} %May be changed to 1 or 2 if section numbers are desired.
 % The file aaai2026.sty is the style file for AAAI Press
 % proceedings, working notes, and technical reports.
 %
 % Title - conditionally set based on version
 \ifdefined\aaaianonymous
    \title{AAAI Press Anonymous Submission\\Instructions for Authors Using \LaTeX{}}
 \else
    \title{AAAI Press Formatting Instructions \\for Authors Using \LaTeX{} --- A Guide}
 \fi
 % Author and affiliation information
 \author{
    %Authors
    % All authors must be in the same font size and format.
    Written by AAAI Press Staff\textsuperscript{\rm 1}\thanks{With help from the AAAI Publications Committee.}\\
    AAAI Style Contributions by Pater Patel Schneider,
    Sunil Issar,\\
    J. Scott Penberthy,
    George Ferguson,
    Hans Guesgen,
    Francisco Cruz\equalcontrib,
    Marc Pujol-Gonzalez\equalcontrib
 }
 \affiliations{
    %Afiliations
    \textsuperscript{\rm 1}Association for the Advancement of Artificial Intelligence\\
    % If you have multiple authors and multiple affiliations
    % use superscripts in text and roman font to identify them.
    % For example,
    % Sunil Issar\textsuperscript{\rm 2},
    % J. Scott Penberthy\textsuperscript{\rm 3},
    % George Ferguson\textsuperscript{\rm 4},
    % Hans Guesgen\textsuperscript{\rm 5}
    % Note that the comma should be placed after the superscript
    1101 Pennsylvania Ave, NW Suite 300\\
    Washington, DC 20004 USA\\
    % email address must be in roman text type, not monospace or sans serif
    proceedings-questions@aaai.org
 %
 % See more examples next
 }
 %Example, Single Author, ->> remove \iffalse,\fi and place them surrounding AAAI title to use it
 \iffalse
 \title{My Publication Title --- Single Author}
 \author {
    Author Name
 }
 \affiliations{
    Affiliation\\
    Affiliation Line 2\\
    name@example.com
 }
 \fi
 \iffalse
 %Example, Multiple Authors, ->> remove \iffalse,\fi and place them surrounding AAAI title to use it
 \title{My Publication Title --- Multiple Authors}
 \author {
    % Authors
    First Author Name\textsuperscript{\rm 1},
    Second Author Name\textsuperscript{\rm 2},
    Third Author Name\textsuperscript{\rm 1}
 }
 \affiliations {
    % Affiliations
    \textsuperscript{\rm 1}Affiliation 1\\
    \textsuperscript{\rm 2}Affiliation 2\\
    firstAuthor@affiliation1.com, secondAuthor@affilation2.com, thirdAuthor@affiliation1.com
 }
 \fi
 % REMOVE THIS: bibentry
 % This is only needed to show inline citations in the guidelines document. You should not need it and can safely delete it.
 \usepackage{bibentry}
 % END REMOVE bibentry
 \begin{document}
 \maketitle
 \begin{abstract}
 AAAI creates proceedings, working notes, and technical reports directly from electronic source furnished by the authors. To ensure that all papers in the publication have a uniform appearance, authors must adhere to the following instructions.
 \end{abstract}
 % Links section - only shown in camera-ready version
 \ifdefined\aaaianonymous
 % Uncomment the following to link to your code, datasets, an extended version or similar.
 % You must keep this block between (not within) the abstract and the main body of the paper.
 % NOTE: For anonymous submissions, do not include links that could reveal your identity
 % \begin{links}
 %     \link{Code}{https://aaai.org/example/code}
 %     \link{Datasets}{https://aaai.org/example/datasets}
 %     \link{Extended version}{https://aaai.org/example/extended-version}
 % \end{links}
 \else
 % Uncomment the following to link to your code, datasets, an extended version or similar.
 % You must keep this block between (not within) the abstract and the main body of the paper.
 \begin{links}
    \link{Code}{https://aaai.org/example/code}
    \link{Datasets}{https://aaai.org/example/datasets}
    \link{Extended version}{https://aaai.org/example/extended-version}
 \end{links}
 \fi
 % Version-specific content
 \ifdefined\aaaianonymous
 \section{Preparing an Anonymous Submission}
 This document details the formatting requirements for anonymous submissions. The requirements are the same as for camera ready papers but with a few notable differences:
 \begin{itemize}
    \item Anonymous submissions must not include the author names and affiliations. Write ``Anonymous Submission'' as the ``sole author'' and leave the affiliations empty.
    \item The PDF document's metadata should be cleared with a metadata-cleaning tool before submitting it. This is to prevent leaked information from revealing your identity.
    \item References must be anonymized whenever the reader can infer that they are to the authors' previous work.
    \item AAAI's copyright notice should not be included as a footer in the first page.
    \item Only the PDF version is required at this stage. No source versions will be requested, nor any copyright transfer form.
 \end{itemize}
 You can remove the copyright notice and ensure that your names aren't shown by including \texttt{submission} option when loading the \texttt{aaai2026} package:
 \begin{quote}\begin{scriptsize}\begin{verbatim}
 \documentclass[letterpaper]{article}
 \usepackage[submission]{aaai2026}
 \end{verbatim}\end{scriptsize}\end{quote}
 The remainder of this document are the original camera-ready instructions. Any contradiction of the above points ought to be ignored while preparing anonymous submissions.
 \section{Camera-Ready Guidelines}
 \else
 \section{Introduction}
 \fi
 Congratulations on having a paper selected for inclusion in an AAAI Press proceedings or technical report! This document details the requirements necessary to get your accepted paper published using PDF\LaTeX{}. If you are using Microsoft Word, instructions are provided in a different document. AAAI Press does not support any other formatting software.
 The instructions herein are provided as a general guide for experienced \LaTeX{} users. If you do not know how to use \LaTeX{}, please obtain assistance locally. AAAI cannot provide you with support and the accompanying style files are \textbf{not} guaranteed to work. If the results you obtain are not in accordance with the specifications you received, you must correct your source file to achieve the correct result.
 These instructions are generic. Consequently, they do not include specific dates, page charges, and so forth. Please consult your specific written conference instructions for details regarding your submission. Please review the entire document for specific instructions that might apply to your particular situation. All authors must comply with the following:
 \begin{itemize}
 \item You must use the 2026 AAAI Press \LaTeX{} style file and the aaai2026.bst bibliography style files, which are located in the 2026 AAAI Author Kit (aaai2026.sty, aaai2026.bst).
 \item You must complete, sign, and return by the deadline the AAAI copyright form (unless directed by AAAI Press to use the AAAI Distribution License instead).
 \item You must read and format your paper source and PDF according to the formatting instructions for authors.
 \item You must submit your electronic files and abstract using our electronic submission form \textbf{on time.}
 \item You must pay any required page or formatting charges to AAAI Press so that they are received by the deadline.
 \item You must check your paper before submitting it, ensuring that it compiles without error, and complies with the guidelines found in the AAAI Author Kit.
 \end{itemize}
 \ifdefined\aaaianonymous
 \else
 \section{Copyright}
 All papers submitted for publication by AAAI Press must be accompanied by a valid signed copyright form. They must also contain the AAAI copyright notice at the bottom of the first page of the paper. There are no exceptions to these requirements. If you fail to provide us with a signed copyright form or disable the copyright notice, we will be unable to publish your paper. There are \textbf{no exceptions} to this policy. You will find a PDF version of the AAAI copyright form in the AAAI AuthorKit. Please see the specific instructions for your conference for submission details.
 \fi
 \section{Formatting Requirements in Brief}
 We need source and PDF files that can be used in a variety of ways and can be output on a variety of devices. The design and appearance of the paper is \ifdefined\aaaianonymous governed by the aaai2026.sty file (aaai2026.bst for the bibliography style).\else strictly governed by the aaai style file (aaai2026.sty).\fi
 \ifdefined\aaaianonymous
 \begin{itemize}
 \item You must not modify the aaai2026.sty file or change the TeX commands.
 \item You must not use any commands that alter the layout or formatting of your document (i.e., you cannot change the default margins, line spacing, etc.).
 \item You may include other font size changes, color changes, or other formatting commands in your own source, but the paper has to be able to compile, and the styling commands are ignored.
 \end{itemize}
 \else
 \textbf{You must not make any changes to the aaai style file, nor use any commands, packages, style files, or macros within your own paper that alter that design, including, but not limited to spacing, floats, margins, fonts, font size, and appearance.} AAAI imposes requirements on your source and PDF files that must be followed. Most of these requirements are based on our efforts to standardize conference manuscript properties and layout. All papers submitted to AAAI for publication will be recompiled for standardization purposes. Consequently, every paper submission must comply with the following requirements:
 \begin{itemize}
 \item Your .tex file must compile in PDF\LaTeX{} --- (you may not include .ps or .eps figure files.)
 \item All fonts must be embedded in the PDF file --- including your figures.
 \item Modifications to the style file, whether directly or via commands in your document may not ever be made, most especially when made in an effort to avoid extra page charges or make your paper fit in a specific number of pages.
 \item No type 3 fonts may be used (even in illustrations).
 \item You may not alter the spacing above and below captions, figures, headings, and subheadings.
 \item You may not alter the font sizes of text elements, footnotes, heading elements, captions, or title information (for references and mathematics, please see the limited exceptions provided herein).
 \item You may not alter the line spacing of text.
 \item Your title must follow Title Case capitalization rules (not sentence case).
 \item \LaTeX{} documents must use the Times or Nimbus font package (you may not use Computer Modern for the text of your paper).
 \item No \LaTeX{} 209 documents may be used or submitted.
 \item Your source must not require use of fonts for non-Roman alphabets within the text itself. If your paper includes symbols in other languages (such as, but not limited to, Arabic, Chinese, Hebrew, Japanese, Thai, Russian and other Cyrillic languages), you must restrict their use to bit-mapped figures. Fonts that require non-English language support (CID and Identity-H) must be converted to outlines or 300 dpi bitmap or removed from the document (even if they are in a graphics file embedded in the document).
 \item Two-column format in AAAI style is required for all papers.
 \item The paper size for final submission must be US letter without exception.
 \item The source file must exactly match the PDF.
 \item The document margins may not be exceeded (no overfull boxes).
 \item The number of pages and the file size must be as specified for your event.
 \item No document may be password protected.
 \item Neither the PDFs nor the source may contain any embedded links or bookmarks (no hyperref or navigator packages).
 \item Your source and PDF must not have any page numbers, footers, or headers (no pagestyle commands).
 \item Your PDF must be compatible with Acrobat 5 or higher.
 \item Your \LaTeX{} source file (excluding references) must consist of a \textbf{single} file (use of the ``input" command is not allowed.
 \item Your graphics must be sized appropriately outside of \LaTeX{} (do not use the ``clip" or ``trim'' command) .
 \end{itemize}
 If you do not follow these requirements, your paper will be returned to you to correct the deficiencies.
 \fi
 \section{What Files to Submit}
 You must submit the following items to ensure that your paper is published:
 \begin{itemize}
 \item A fully-compliant PDF file.
 \item Your \LaTeX{} source file submitted as a \textbf{single} .tex file (do not use the ``input" command to include sections of your paper --- every section must be in the single source file). (The only allowable exception is .bib file, which should be included separately).
 \item The bibliography (.bib) file(s).
 \item Your source must compile on our system, which includes only standard \LaTeX{} 2020 TeXLive support files.
 \item Only the graphics files used in compiling paper.
 \item The \LaTeX{}-generated files (e.g. .aux,  .bbl file, PDF, etc.).
 \end{itemize}
 Your \LaTeX{} source will be reviewed and recompiled on our system (if it does not compile, your paper will be returned to you. \textbf{Do not submit your source in multiple text files.} Your single \LaTeX{} source file must include all your text, your bibliography (formatted using aaai2026.bst), and any custom macros.
 Your files should work without any supporting files (other than the program itself) on any computer with a standard \LaTeX{} distribution.
 \textbf{Do not send files that are not actually used in the paper.} Avoid including any files not needed for compiling your paper, including, for example, this instructions file, unused graphics files, style files, additional material sent for the purpose of the paper review, intermediate build files and so forth.
 \textbf{Obsolete style files.} The commands for some common packages (such as some used for algorithms), may have changed. Please be certain that you are not compiling your paper using old or obsolete style files.
 \textbf{Final Archive.} Place your source files in a single archive which should be compressed using .zip. The final file size may not exceed 10 MB.
 Name your source file with the last (family) name of the first author, even if that is not you.
 \section{Using \LaTeX{} to Format Your Paper}
 The latest version of the AAAI style file is available on AAAI's website. Download this file and place it in the \TeX\ search path. Placing it in the same directory as the paper should also work. You must download the latest version of the complete AAAI Author Kit so that you will have the latest instruction set and style file.
 \subsection{Document Preamble}
 In the \LaTeX{} source for your paper, you \textbf{must} place the following lines as shown in the example in this subsection. This command set-up is for three authors. Add or subtract author and address lines as necessary, and uncomment the portions that apply to you. In most instances, this is all you need to do to format your paper in the Times font. The helvet package will cause Helvetica to be used for sans serif. These files are part of the PSNFSS2e package, which is freely available from many Internet sites (and is often part of a standard installation).
 Leave the setcounter for section number depth commented out and set at 0 unless you want to add section numbers to your paper. If you do add section numbers, you must uncomment this line and change the number to 1 (for section numbers), or 2 (for section and subsection numbers). The style file will not work properly with numbering of subsubsections, so do not use a number higher than 2.
 \subsubsection{The Following Must Appear in Your Preamble}
 \ifdefined\aaaianonymous
 \begin{quote}
 \begin{scriptsize}\begin{verbatim}
 \documentclass[letterpaper]{article}
 % DO NOT CHANGE THIS
 \usepackage[submission]{aaai2026} % DO NOT CHANGE THIS
 \usepackage{times} % DO NOT CHANGE THIS
 \usepackage{helvet} % DO NOT CHANGE THIS
 \usepackage{courier} % DO NOT CHANGE THIS
 \usepackage[hyphens]{url} % DO NOT CHANGE THIS
 \usepackage{graphicx} % DO NOT CHANGE THIS
 \urlstyle{rm} % DO NOT CHANGE THIS
 \def\UrlFont{\rm} % DO NOT CHANGE THIS
 \usepackage{graphicx}  % DO NOT CHANGE THIS
 \usepackage{natbib}  % DO NOT CHANGE THIS
 \usepackage{caption}  % DO NOT CHANGE THIS
 \frenchspacing % DO NOT CHANGE THIS
 \setlength{\pdfpagewidth}{8.5in} % DO NOT CHANGE THIS
 \setlength{\pdfpageheight}{11in} % DO NOT CHANGE THIS
 %
 % Keep the \pdfinfo as shown here. There's no need
 % for you to add the /Title and /Author tags.
 \pdfinfo{
 /TemplateVersion (2026.1)
 }
 \end{verbatim}\end{scriptsize}
 \end{quote}
 \else
 \begin{quote}
 \begin{scriptsize}\begin{verbatim}
 \documentclass[letterpaper]{article}
 % DO NOT CHANGE THIS
 \usepackage{aaai2026} % DO NOT CHANGE THIS
 \usepackage{times} % DO NOT CHANGE THIS
 \usepackage{helvet} % DO NOT CHANGE THIS
 \usepackage{courier} % DO NOT CHANGE THIS
 \usepackage[hyphens]{url} % DO NOT CHANGE THIS
 \usepackage{graphicx} % DO NOT CHANGE THIS
 \urlstyle{rm} % DO NOT CHANGE THIS
 \def\UrlFont{\rm} % DO NOT CHANGE THIS
 \usepackage{graphicx}  % DO NOT CHANGE THIS
 \usepackage{natbib}  % DO NOT CHANGE THIS
 \usepackage{caption}  % DO NOT CHANGE THIS
 \frenchspacing % DO NOT CHANGE THIS
 \setlength{\pdfpagewidth}{8.5in} % DO NOT CHANGE THIS
 \setlength{\pdfpageheight}{11in} % DO NOT CHANGE THIS
 %
 % Keep the \pdfinfo as shown here. There's no need
 % for you to add the /Title and /Author tags.
 \pdfinfo{
 /TemplateVersion (2026.1)
 }
 \end{verbatim}\end{scriptsize}
 \end{quote}
 \fi
 \subsection{Preparing Your Paper}
 After the preamble above, you should prepare your paper as follows:
 \begin{quote}
 \begin{scriptsize}\begin{verbatim}
 \begin{document}
 \maketitle
 \begin{abstract}
 %...
 \end{abstract}\end{verbatim}\end{scriptsize}
 \end{quote}
 \noindent If you want to add links to the paper's code, dataset(s), and extended version or similar this is the place to add them, within a \emph{links} environment:
 \begin{quote}%
 \begin{scriptsize}\begin{verbatim}
 \begin{links}
  \link{Code}{https://aaai.org/example/guidelines}
  \link{Datasets}{https://aaai.org/example/datasets}
  \link{Extended version}{https://aaai.org/example}
 \end{links}\end{verbatim}\end{scriptsize}
 \end{quote}
 \ifdefined\aaaianonymous
 \noindent Make sure that you do not de-anonymize yourself with these links.
 \fi
 \noindent You should then continue with the body of your paper. Your paper must conclude with the references, which should be inserted as follows:
 \begin{quote}
 \begin{scriptsize}\begin{verbatim}
 % References and End of Paper
 % These lines must be placed at the end of your paper
 \bibliography{Bibliography-File}
 \end{document}
 \end{verbatim}\end{scriptsize}
 \end{quote}
 \begin{quote}
 \begin{scriptsize}\begin{verbatim}
 \begin{document}\\
 \maketitle\\
 ...\\
 \bibliography{Bibliography-File}\\
 \end{document}\\
 \end{verbatim}\end{scriptsize}
 \end{quote}
 \subsection{Commands and Packages That May Not Be Used}
 \begin{table*}[t]
 \centering
 \begin{tabular}{l|l|l|l}
 \textbackslash abovecaption &
 \textbackslash abovedisplay &
 \textbackslash addevensidemargin &
 \textbackslash addsidemargin \\
 \textbackslash addtolength &
 \textbackslash baselinestretch &
 \textbackslash belowcaption &
 \textbackslash belowdisplay \\
 \textbackslash break &
 \textbackslash clearpage &
 \textbackslash clip &
 \textbackslash columnsep \\
 \textbackslash float &
 \textbackslash input &
 \textbackslash input &
 \textbackslash linespread \\
 \textbackslash newpage &
 \textbackslash pagebreak &
 \textbackslash renewcommand &
 \textbackslash setlength \\
 \textbackslash text height &
 \textbackslash tiny &
 \textbackslash top margin &
 \textbackslash trim \\
 \textbackslash vskip\{- &
 \textbackslash vspace\{- \\
 \end{tabular}
 \caption{Commands that must not be used}
 \label{table1}
 \end{table*}
 \begin{table}[t]
 \centering
 \begin{tabular}{l|l|l|l}
    authblk & babel & cjk & dvips \\
    epsf & epsfig & euler & float \\
    fullpage & geometry & graphics & hyperref \\
    layout & linespread & lmodern & maltepaper \\
    navigator & pdfcomment & pgfplots & psfig \\
    pstricks & t1enc & titlesec & tocbind \\
    ulem
 \end{tabular}
 \caption{LaTeX style packages that must not be used.}
 \label{table2}
 \end{table}
 There are a number of packages, commands, scripts, and macros that are incompatable with aaai2026.sty. The common ones are listed in tables \ref{table1} and \ref{table2}. Generally, if a command, package, script, or macro alters floats, margins, fonts, sizing, linespacing, or the presentation of the references and citations, it is unacceptable. Note that negative vskip and vspace may not be used except in certain rare occurances, and may never be used around tables, figures, captions, sections, subsections, subsubsections, or references.
 \subsection{Page Breaks}
 For your final camera ready copy, you must not use any page break commands. References must flow directly after the text without breaks. Note that some conferences require references to be on a separate page during the review process. AAAI Press, however, does not require this condition for the final paper.
 \subsection{Paper Size, Margins, and Column Width}
 Papers must be formatted to print in two-column format on 8.5 x 11 inch US letter-sized paper. The margins must be exactly as follows:
 \begin{itemize}
 \ifdefined\aaaianonymous
 \item Top margin: 1.25 inches (first page), .75 inches (others)
 \else
 \item Top margin: .75 inches
 \fi
 \item Left margin: .75 inches
 \item Right margin: .75 inches
 \item Bottom margin: 1.25 inches
 \end{itemize}
 The default paper size in most installations of \LaTeX{} is A4. However, because we require that your electronic paper be formatted in US letter size, the preamble we have provided includes commands that alter the default to US letter size. Please note that using any other package to alter page size (such as, but not limited to the Geometry package) will result in your final paper being returned to you for correction.
 \subsubsection{Column Width and Margins.}
 To ensure maximum readability, your paper must include two columns. Each column should be 3.3 inches wide (slightly more than 3.25 inches), with a .375 inch (.952 cm) gutter of white space between the two columns. The aaai2026.sty file will automatically create these columns for you.
 \subsection{Overlength Papers}
 If your paper is too long and you resort to formatting tricks to make it fit, it is quite likely that it will be returned to you. The best way to retain readability if the paper is overlength is to cut text, figures, or tables. There are a few acceptable ways to reduce paper size that don't affect readability. First, turn on \textbackslash frenchspacing, which will reduce the space after periods. Next, move all your figures and tables to the top of the page. Consider removing less important portions of a figure. If you use \textbackslash centering instead of \textbackslash begin\{center\} in your figure environment, you can also buy some space. For mathematical environments, you may reduce fontsize {\bf but not below 6.5 point}.
 Commands that alter page layout are forbidden. These include \textbackslash columnsep,  \textbackslash float, \textbackslash topmargin, \textbackslash topskip, \textbackslash textheight, \textbackslash textwidth, \textbackslash oddsidemargin, and \textbackslash evensizemargin (this list is not exhaustive). If you alter page layout, you will be required to pay the page fee. Other commands that are questionable and may cause your paper to be rejected include \textbackslash parindent, and \textbackslash parskip. Commands that alter the space between sections are forbidden. The title sec package is not allowed. Regardless of the above, if your paper is obviously ``squeezed" it is not going to to be accepted. Options for reducing the length of a paper include reducing the size of your graphics, cutting text, or paying the extra page charge (if it is offered).
 \subsection{Type Font and Size}
 Your paper must be formatted in Times Roman or Nimbus. We will not accept papers formatted using Computer Modern or Palatino or some other font as the text or heading typeface. Sans serif, when used, should be Courier. Use Symbol or Lucida or Computer Modern for \textit{mathematics only. }
 Do not use type 3 fonts for any portion of your paper, including graphics. Type 3 bitmapped fonts are designed for fixed resolution printers. Most print at 300 dpi even if the printer resolution is 1200 dpi or higher. They also often cause high resolution imagesetter devices to crash. Consequently, AAAI will not accept electronic files containing obsolete type 3 fonts. Files containing those fonts (even in graphics) will be rejected. (Authors using blackboard symbols must avoid packages that use type 3 fonts.)
 Fortunately, there are effective workarounds that will prevent your file from embedding type 3 bitmapped fonts. The easiest workaround is to use the required times, helvet, and courier packages with \LaTeX{}2e. (Note that papers formatted in this way will still use Computer Modern for the mathematics. To make the math look good, you'll either have to use Symbol or Lucida, or you will need to install type 1 Computer Modern fonts --- for more on these fonts, see the section ``Obtaining Type 1 Computer Modern.")
 If you are unsure if your paper contains type 3 fonts, view the PDF in Acrobat Reader. The Properties/Fonts window will display the font name, font type, and encoding properties of all the fonts in the document. If you are unsure if your graphics contain type 3 fonts (and they are PostScript or encapsulated PostScript documents), create PDF versions of them, and consult the properties window in Acrobat Reader.
 The default size for your type must be ten-point with twelve-point leading (line spacing). Start all pages (except the first) directly under the top margin. (See the next section for instructions on formatting the title page.) Indent ten points when beginning a new paragraph, unless the paragraph begins directly below a heading or subheading.
 \subsubsection{Obtaining Type 1 Computer Modern for \LaTeX{}.}
 If you use Computer Modern for the mathematics in your paper (you cannot use it for the text) you may need to download type 1 Computer fonts. They are available without charge from the American Mathematical Society:
 http://www.ams.org/tex/type1-fonts.html.
 \subsubsection{Nonroman Fonts.}
 If your paper includes symbols in other languages (such as, but not limited to, Arabic, Chinese, Hebrew, Japanese, Thai, Russian and other Cyrillic languages), you must restrict their use to bit-mapped figures.
 \subsection{Title and Authors}
 Your title must appear centered over both text columns in sixteen-point bold type (twenty-four point leading). The title must be written in Title Case capitalization rules (not sentence case). The rules are a bit involved, but in general verbs (including short verbs like be, is, using, and go), nouns, adverbs, adjectives, and pronouns should be capitalized, (including both words in hyphenated terms), while articles, conjunctions, and prepositions are lower case unless they directly follow a colon or long dash. You can use the online tool \url{https://titlecaseconverter.com/} to double-check the proper capitalization (select the "Chicago" style and mark the "Show explanations" checkbox).
 Author's names should appear below the title of the paper, centered in twelve-point type (with fifteen point leading), along with affiliation(s) and complete address(es) (including electronic mail address if available) in nine-point roman type (the twelve point leading). You should begin the two-column format when you come to the abstract.
 \subsubsection{Formatting Author Information.}
 Author information has to be set according to the following specification depending if you have one or more than one affiliation. You may not use a table nor may you employ the \textbackslash authorblk.sty package. For one or several authors from the same institution, please separate them with commas and write all affiliation directly below (one affiliation per line) using the macros \textbackslash author and \textbackslash affiliations:
 \begin{quote}\begin{scriptsize}\begin{verbatim}
 \author{
    Author 1, ..., Author n\\
 }
 \affiliations {
    Address line\\
    ... \\
    Address line\\
 }
 \end{verbatim}\end{scriptsize}\end{quote}
 \noindent For authors from different institutions, use \textbackslash textsuperscript \{\textbackslash rm x \} to match authors and affiliations. Notice that there should not be any spaces between the author name (or comma following it) and the superscript.
 \begin{quote}\begin{scriptsize}\begin{verbatim}
 \author{
    AuthorOne\equalcontrib\textsuperscript{\rm 1,\rm 2},
    AuthorTwo\equalcontrib\textsuperscript{\rm 2},
    AuthorThree\textsuperscript{\rm 3},\\
    AuthorFour\textsuperscript{\rm 4},
    AuthorFive \textsuperscript{\rm 5}}
 }
 \affiliations {
    \textsuperscript{\rm 1}AffiliationOne,\\
    \textsuperscript{\rm 2}AffiliationTwo,\\
    \textsuperscript{\rm 3}AffiliationThree,\\
    \textsuperscript{\rm 4}AffiliationFour,\\
    \textsuperscript{\rm 5}AffiliationFive\\
    \{email, email\}@affiliation.com,
    email@affiliation.com,
    email@affiliation.com,
    email@affiliation.com
 }
 \end{verbatim}\end{scriptsize}\end{quote}
 You can indicate that some authors contributed equally using the \textbackslash equalcontrib command. This will add a marker after the author names and a footnote on the first page.
 Note that you may want to  break the author list for better visualization. You can achieve this using a simple line break (\textbackslash  \textbackslash).
 \subsection{\LaTeX{} Copyright Notice}
 The copyright notice automatically appears if you use aaai2026.sty. It has been hardcoded and may not be disabled.
 \subsection{Credits}
 Any credits to a sponsoring agency should appear in the acknowledgments section, unless the agency requires different placement. If it is necessary to include this information on the front page, use
 \textbackslash thanks in either the \textbackslash author or \textbackslash title commands.
 For example:
 \begin{quote}
 \begin{small}
 \textbackslash title\{Very Important Results in AI\textbackslash thanks\{This work is
 supported by everybody.\}\}
 \end{small}
 \end{quote}
 Multiple \textbackslash thanks commands can be given. Each will result in a separate footnote indication in the author or title with the corresponding text at the botton of the first column of the document. Note that the \textbackslash thanks command is fragile. You will need to use \textbackslash protect.
 Please do not include \textbackslash pubnote commands in your document.
 \subsection{Abstract}
 Follow the example commands in this document for creation of your abstract. The command \textbackslash begin\{abstract\} will automatically indent the text block. Please do not indent it further. {Do not include references in your abstract!}
 \subsection{Page Numbers}
 Do not print any page numbers on your paper. The use of \textbackslash pagestyle is forbidden.
 \subsection{Text}
 The main body of the paper must be formatted in black, ten-point Times Roman with twelve-point leading (line spacing). You may not reduce font size or the linespacing. Commands that alter font size or line spacing (including, but not limited to baselinestretch, baselineshift, linespread, and others) are expressly forbidden. In addition, you may not use color in the text.
 \subsection{Citations}
 Citations within the text should include the author's last name and year, for example (Newell 1980). Append lower-case letters to the year in cases of ambiguity. Multiple authors should be treated as follows: (Feigenbaum and Engelmore 1988) or (Ford, Hayes, and Glymour 1992). In the case of four or more authors, list only the first author, followed by et al. (Ford et al. 1997).
 \subsection{Extracts}
 Long quotations and extracts should be indented ten points from the left and right margins.
 \begin{quote}
 This is an example of an extract or quotation. Note the indent on both sides. Quotation marks are not necessary if you offset the text in a block like this, and properly identify and cite the quotation in the text.
 \end{quote}
 \subsection{Footnotes}
 Use footnotes judiciously, taking into account that they interrupt the reading of the text. When required, they should be consecutively numbered throughout with superscript Arabic numbers. Footnotes should appear at the bottom of the page, separated from the text by a blank line space and a thin, half-point rule.
 \subsection{Headings and Sections}
 When necessary, headings should be used to separate major sections of your paper. Remember, you are writing a short paper, not a lengthy book! An overabundance of headings will tend to make your paper look more like an outline than a paper. The aaai2026.sty package will create headings for you. Do not alter their size nor their spacing above or below.
 \subsubsection{Section Numbers.}
 The use of section numbers in AAAI Press papers is optional. To use section numbers in \LaTeX{}, uncomment the setcounter line in your document preamble and change the 0 to a 1. Section numbers should not be used in short poster papers and/or extended abstracts.
 \subsubsection{Section Headings.}
 Sections should be arranged and headed as follows:
 \begin{enumerate}
 \item Main content sections
 \item Appendices (optional)
 \item Ethical Statement (optional, unnumbered)
 \item Acknowledgements (optional, unnumbered)
 \item References (unnumbered)
 \end{enumerate}
 \subsubsection{Appendices.}
 Any appendices must appear after the main content. If your main sections are numbered, appendix sections must use letters instead of arabic numerals. In \LaTeX{} you can use the \texttt{\textbackslash appendix} command to achieve this effect and then use \texttt{\textbackslash section\{Heading\}} normally for your appendix sections.
 \subsubsection{Ethical Statement.}
 You can write a statement about the potential ethical impact of your work, including its broad societal implications, both positive and negative. If included, such statement must be written in an unnumbered section titled \emph{Ethical Statement}.
 \subsubsection{Acknowledgments.}
 The acknowledgments section, if included, appears right before the references and is headed ``Acknowledgments". It must not be numbered even if other sections are (use \texttt{\textbackslash section*\{Acknowledgements\}} in \LaTeX{}). This section includes acknowledgments of help from associates and colleagues, credits to sponsoring agencies, financial support, and permission to publish. Please acknowledge other contributors, grant support, and so forth, in this section. Do not put acknowledgments in a footnote on the first page. If your grant agency requires acknowledgment of the grant on page 1, limit the footnote to the required statement, and put the remaining acknowledgments at the back. Please try to limit acknowledgments to no more than three sentences.
 \subsubsection{References.}
 The references section should be labeled ``References" and must appear at the very end of the paper (don't end the paper with references, and then put a figure by itself on the last page). A sample list of references is given later on in these instructions. Please use a consistent format for references. Poorly prepared or sloppy references reflect badly on the quality of your paper and your research. Please prepare complete and accurate citations.
 \subsection{Illustrations and  Figures}
 \begin{figure}[t]
 \centering
 \includegraphics[width=0.9\columnwidth]{figure1} % Reduce the figure size so that it is slightly narrower than the column. Don't use precise values for figure width.This setup will avoid overfull boxes.
 \caption{Using the trim and clip commands produces fragile layers that can result in disasters (like this one from an actual paper) when the color space is corrected or the PDF combined with others for the final proceedings. Crop your figures properly in a graphics program -- not in LaTeX.}
 \label{fig1}
 \end{figure}
 \begin{figure*}[t]
 \centering
 \includegraphics[width=0.8\textwidth]{figure2} % Reduce the figure size so that it is slightly narrower than the column.
 \caption{Adjusting the bounding box instead of actually removing the unwanted data resulted multiple layers in this paper. It also needlessly increased the PDF size. In this case, the size of the unwanted layer doubled the paper's size, and produced the following surprising results in final production. Crop your figures properly in a graphics program. Don't just alter the bounding box.}
 \label{fig2}
 \end{figure*}
 Your paper must compile in PDF\LaTeX{}. Consequently, all your figures must be .jpg, .png, or .pdf. You may not use the .gif (the resolution is too low), .ps, or .eps file format for your figures.
 Figures, drawings, tables, and photographs should be placed throughout the paper on the page (or the subsequent page) where they are first discussed. Do not group them together at the end of the paper. If placed at the top of the paper, illustrations may run across both columns. Figures must not invade the top, bottom, or side margin areas. Figures must be inserted using the \textbackslash usepackage\{graphicx\}. Number figures sequentially, for example, figure 1, and so on. Do not use minipage to group figures.
 If you normally create your figures using pgfplots, please create the figures first, and then import them as pdfs with proper bounding boxes, as the bounding and trim boxes created by pfgplots are fragile and not valid.
 When you include your figures, you must crop them \textbf{outside} of \LaTeX{}. The command \textbackslash includegraphics*[clip=true, viewport 0 0 10 10]{...} might result in a PDF that looks great, but the image is \textbf{not really cropped.} The full image can reappear (and obscure whatever it is overlapping) when page numbers are applied or color space is standardized. Figures \ref{fig1}, and \ref{fig2} display some unwanted results that often occur.
 If your paper includes illustrations that are not compatible with PDF\TeX{} (such as .eps or .ps documents), you will need to convert them. The epstopdf package will usually work for eps files. You will need to convert your ps files to PDF in either case.
 \subsubsection {Figure Captions.}The illustration number and caption must appear \textit{under} the illustration. Labels and other text with the actual illustration must be at least nine-point type. However, the font and size of figure captions must be 10 point roman. Do not make them smaller, bold, or italic. (Individual words may be italicized if the context requires differentiation.)
 \subsection{Tables}
 Tables should be presented in 10 point roman type. If necessary, they may be altered to 9 point type. You must not use \texttt{\textbackslash resizebox} or other commands that resize the entire table to make it smaller, because you can't control the final font size this way.
 If your table is too large you can use \texttt{\textbackslash setlength\{\textbackslash tabcolsep\}\{1mm\}} to compress the columns a bit or you can adapt the content (e.g.: reduce the decimal precision when presenting numbers, use shortened column titles, make some column duble-line to get it narrower).
 Tables that do not fit in a single column must be placed across double columns. If your table won't fit within the margins even when spanning both columns and using the above techniques, you must split it in two separate tables.
 \subsubsection {Table Captions.} The number and caption for your table must appear \textit{under} (not above) the table.  Additionally, the font and size of table captions must be 10 point roman and must be placed beneath the figure. Do not make them smaller, bold, or italic. (Individual words may be italicized if the context requires differentiation.)
 \subsubsection{Low-Resolution Bitmaps.}
 You may not use low-resolution (such as 72 dpi) screen-dumps and GIF files---these files contain so few pixels that they are always blurry, and illegible when printed. If they are color, they will become an indecipherable mess when converted to black and white. This is always the case with gif files, which should never be used. The resolution of screen dumps can be increased by reducing the print size of the original file while retaining the same number of pixels. You can also enlarge files by manipulating them in software such as PhotoShop. Your figures should be 300 dpi when incorporated into your document.
 \subsubsection{\LaTeX{} Overflow.}
 \LaTeX{} users please beware: \LaTeX{} will sometimes put portions of the figure or table or an equation in the margin. If this happens, you need to make the figure or table span both columns. If absolutely necessary, you may reduce the figure, or reformat the equation, or reconfigure the table.{ \bf Check your log file!} You must fix any overflow into the margin (that means no overfull boxes in \LaTeX{}). \textbf{Nothing is permitted to intrude into the margin or gutter.}
 \subsubsection{Using Color.}
 Use of color is restricted to figures only. It must be WACG 2.0 compliant. (That is, the contrast ratio must be greater than 4.5:1 no matter the font size.) It must be CMYK, NOT RGB. It may never be used for any portion of the text of your paper. The archival version of your paper will be printed in black and white and grayscale. The web version must be readable by persons with disabilities. Consequently, because conversion to grayscale can cause undesirable effects (red changes to black, yellow can disappear, and so forth), we strongly suggest you avoid placing color figures in your document. If you do include color figures, you must (1) use the CMYK (not RGB) colorspace and (2) be mindful of readers who may happen to have trouble distinguishing colors. Your paper must be decipherable without using color for distinction.
 \subsubsection{Drawings.}
 We suggest you use computer drawing software (such as Adobe Illustrator or, (if unavoidable), the drawing tools in Microsoft Word) to create your illustrations. Do not use Microsoft Publisher. These illustrations will look best if all line widths are uniform (half- to two-point in size), and you do not create labels over shaded areas. Shading should be 133 lines per inch if possible. Use Times Roman or Helvetica for all figure call-outs. \textbf{Do not use hairline width lines} --- be sure that the stroke width of all lines is at least .5 pt. Zero point lines will print on a laser printer, but will completely disappear on the high-resolution devices used by our printers.
 \subsubsection{Photographs and Images.}
 Photographs and other images should be in grayscale (color photographs will not reproduce well; for example, red tones will reproduce as black, yellow may turn to white, and so forth) and set to a minimum of 300 dpi. Do not prescreen images.
 \subsubsection{Resizing Graphics.}
 Resize your graphics \textbf{before} you include them with LaTeX. You may \textbf{not} use trim or clip options as part of your \textbackslash includegraphics command. Resize the media box of your PDF using a graphics program instead.
 \subsubsection{Fonts in Your Illustrations.}
 You must embed all fonts in your graphics before including them in your LaTeX document.
 \subsubsection{Algorithms.}
 Algorithms and/or programs are a special kind of figures. Like all illustrations, they should appear floated to the top (preferably) or bottom of the page. However, their caption should appear in the header, left-justified and enclosed between horizontal lines, as shown in Algorithm~\ref{alg:algorithm}. The algorithm body should be terminated with another horizontal line. It is up to the authors to decide whether to show line numbers or not, how to format comments, etc.
 In \LaTeX{} algorithms may be typeset using the {\tt algorithm} and {\tt algorithmic} packages, but you can also use one of the many other packages for the task.
 \begin{algorithm}[tb]
 \caption{Example algorithm}
 \label{alg:algorithm}
 \textbf{Input}: Your algorithm's input\\
 \textbf{Parameter}: Optional list of parameters\\
 \textbf{Output}: Your algorithm's output
 \begin{algorithmic}[1] %[1] enables line numbers
 \STATE Let $t=0$.
 \WHILE{condition}
 \STATE Do some action.
 \IF {conditional}
 \STATE Perform task A.
 \ELSE
 \STATE Perform task B.
 \ENDIF
 \ENDWHILE
 \STATE \textbf{return} solution
 \end{algorithmic}
 \end{algorithm}
 \subsubsection{Listings.}
 Listings are much like algorithms and programs. They should also appear floated to the top (preferably) or bottom of the page. Listing captions should appear in the header, left-justified and enclosed between horizontal lines as shown in Listing~\ref{lst:listing}. Terminate the body with another horizontal line and avoid any background color. Line numbers, if included, must appear within the text column.
 \begin{listing}[tb]%
 \caption{Example listing {\tt quicksort.hs}}%
 \label{lst:listing}%
 \begin{lstlisting}[language=Haskell]
 quicksort :: Ord a => [a] -> [a]
 quicksort []     = []
 quicksort (p:xs) = (quicksort lesser) ++ [p] ++ (quicksort greater)
 	where
 		lesser  = filter (< p) xs
 		greater = filter (>= p) xs
 \end{lstlisting}
 \end{listing}
 \subsection{References}
 The AAAI style includes a set of definitions for use in formatting references with BibTeX. These definitions make the bibliography style fairly close to the ones  specified in the Reference Examples appendix below. To use these definitions, you also need the BibTeX style file ``aaai2026.bst," available in the AAAI Author Kit on the AAAI web site. Then, at the end of your paper but before \textbackslash end{document}, you need to put the following lines:
 \begin{quote}
 \begin{small}
 \textbackslash bibliography\{bibfile1,bibfile2,...\}
 \end{small}
 \end{quote}
 Please note that the aaai2026.sty class already sets the bibliographystyle for you, so you do not have to place any \textbackslash bibliographystyle command in the document yourselves. The aaai2026.sty file is incompatible with the hyperref and navigator packages. If you use either, your references will be garbled and your paper will be returned to you.
 References may be the same size as surrounding text.
 However, in this section (only), you may reduce the size to {\em \textbackslash small} (9pt) if your paper exceeds the allowable number of pages. Making it any smaller than 9 point with 10 point linespacing, however, is not allowed.
 The list of files in the \textbackslash bibliography command should be the names of your BibTeX source files (that is, the .bib files referenced in your paper).
 The following commands are available for your use in citing references:
 \begin{quote}
 {\em \textbackslash cite:} Cites the given reference(s) with a full citation. This appears as ``(Author Year)'' for one reference, or ``(Author Year; Author Year)'' for multiple references.\smallskip\\
 {\em \textbackslash shortcite:} Cites the given reference(s) with just the year. This appears as ``(Year)'' for one reference, or ``(Year; Year)'' for multiple references.\smallskip\\
 {\em \textbackslash citeauthor:} Cites the given reference(s) with just the author name(s) and no parentheses.\smallskip\\
 {\em \textbackslash citeyear:} Cites the given reference(s) with just the date(s) and no parentheses.
 \end{quote}
 You may also use any of the \emph{natbib} citation commands.
 \section{Proofreading Your PDF}
 Please check all the pages of your PDF file. The most commonly forgotten element is the acknowledgements --- especially the correct grant number. Authors also commonly forget to add the metadata to the source, use the wrong reference style file, or don't follow the capitalization rules or comma placement for their author-title information properly. A final common problem is text (expecially equations) that runs into the margin. You will need to fix these common errors before submitting your file.
 \section{Improperly Formatted Files }
 In the past, AAAI has corrected improperly formatted files submitted by the authors. Unfortunately, this has become an increasingly burdensome expense that we can no longer absorb). Consequently, if your file is improperly formatted, it will be returned to you for correction.
 \section{Naming Your Electronic File}
 We require that you name your \LaTeX{} source file with the last name (family name) of the first author so that it can easily be differentiated from other submissions. Complete file-naming instructions will be provided to you in the submission instructions.
 \section{Submitting Your Electronic Files to AAAI}
 Instructions on paper submittal will be provided to you in your acceptance letter.
 \section{Inquiries}
 If you have any questions about the preparation or submission of your paper as instructed in this document, please contact AAAI Press at the address given below. If you have technical questions about implementation of the aaai style file, please contact an expert at your site. We do not provide technical support for \LaTeX{} or any other software package. To avoid problems, please keep your paper simple, and do not incorporate complicated macros and style files.
 \begin{quote}
 \noindent AAAI Press\\
 1101 Pennsylvania Ave, NW Suite 300\\
 Washington, DC 20004 USA\\
 \textit{Telephone:} 1-202-360-4062\\
 \textit{E-mail:} See the submission instructions for your particular conference or event.
 \end{quote}
 \section{Additional Resources}
 \LaTeX{} is a difficult program to master. If you've used that software, and this document didn't help or some items were not explained clearly, we recommend you read Michael Shell's excellent document (testflow doc.txt V1.0a 2002/08/13) about obtaining correct PS/PDF output on \LaTeX{} systems. (It was written for another purpose, but it has general application as well). It is available at www.ctan.org in the tex-archive.
 \appendix
 \section{Reference Examples}
 \label{sec:reference_examples}
 \nobibliography*
 Formatted bibliographies should look like the following examples. You should use BibTeX to generate the references. Missing fields are unacceptable when compiling references, and usually indicate that you are using the wrong type of entry (BibTeX class).
 \paragraph{Book with multiple authors~\nocite{em:86}} Use the \texttt{@book} class.\\[.2em]
 \bibentry{em:86}.
 \paragraph{Journal and magazine articles~\nocite{r:80, hcr:83}} Use the \texttt{@article} class.\\[.2em]
 \bibentry{r:80}.\\[.2em]
 \bibentry{hcr:83}.
 \paragraph{Proceedings paper published by a society, press or publisher~\nocite{c:83, c:84}} Use the \texttt{@inproceedings} class. You may abbreviate the \emph{booktitle} field, but make sure that the conference edition is clear.\\[.2em]
 \bibentry{c:84}.\\[.2em]
 \bibentry{c:83}.
 \paragraph{University technical report~\nocite{r:86}} Use the \texttt{@techreport} class.\\[.2em]
 \bibentry{r:86}.
 \paragraph{Dissertation or thesis~\nocite{c:79}} Use the \texttt{@phdthesis} class.\\[.2em]
 \bibentry{c:79}.
 \paragraph{Forthcoming publication~\nocite{c:21}} Use the \texttt{@misc} class with a \texttt{note="Forthcoming"} annotation.
 \begin{quote}
 \begin{footnotesize}
 \begin{verbatim}
@misc(key,
  [...]
  note="Forthcoming",
 )
 \end{verbatim}
 \end{footnotesize}
 \end{quote}
 \bibentry{c:21}.
 \paragraph{ArXiv paper~\nocite{c:22}} Fetch the BibTeX entry from the "Export Bibtex Citation" link in the arXiv website. Notice it uses the \texttt{@misc} class instead of the \texttt{@article} one, and that it includes the \texttt{eprint} and \texttt{archivePrefix} keys.
 \begin{quote}
 \begin{footnotesize}
 \begin{verbatim}
@misc(key,
  [...]
  eprint="xxxx.yyyy",
  archivePrefix="arXiv",
 )
 \end{verbatim}
 \end{footnotesize}
 \end{quote}
 \bibentry{c:22}.
 \paragraph{Website or online resource~\nocite{c:23}} Use the \texttt{@misc} class. Add the url in the \texttt{howpublished} field and the date of access in the \texttt{note} field:
 \begin{quote}
 \begin{footnotesize}
 \begin{verbatim}
@misc(key,
  [...]
  howpublished="\url{http://...}",
  note="Accessed: YYYY-mm-dd",
 )
 \end{verbatim}
 \end{footnotesize}
 \end{quote}
 \bibentry{c:23}.
 \vspace{.2em}
 For the most up to date version of the AAAI reference style, please consult the \textit{AI Magazine} Author Guidelines at \url{https://aaai.org/ojs/index.php/aimagazine/about/submissions#authorGuidelines}
 \section{Acknowledgments}
 % Anonymous submission version - shorter acknowledgments
 AAAI is especially grateful to Peter Patel Schneider for his work in implementing the aaai2026.sty file, liberally using the ideas of other style hackers, including Barbara Beeton. We also acknowledge with thanks the work of George Ferguson for his guide to using the style and BibTeX files --- which has been incorporated into this document --- and Hans Guesgen, who provided several timely modifications, as well as the many others who have, from time to time, sent in suggestions on improvements to the AAAI style. We are especially grateful to Francisco Cruz, Marc Pujol-Gonzalez, and Mico Loretan for the improvements to the Bib\TeX{} and \LaTeX{} files made in 2020.
 The preparation of the \LaTeX{} and Bib\TeX{} files that implement these instructions was supported by Schlumberger Palo Alto Research, AT\&T Bell Laboratories, Morgan Kaufmann Publishers, The Live Oak Press, LLC, and AAAI Press. Bibliography style changes were added by Sunil Issar. \verb+\+pubnote was added by J. Scott Penberthy. George Ferguson added support for printing the AAAI copyright slug. Additional changes to aaai2026.sty and aaai2026.bst have been made by Francisco Cruz and Marc Pujol-Gonzalez.
 \bigskip
 \noindent Thank you for reading these instructions carefully. We look forward to receiving your electronic files!
 % Note: \bibliographystyle{aaai2026} is automatically set by aaai2026.sty
 % Do not add \bibliographystyle{aaai2026} here as it will cause "Illegal, another \bibstyle command" error
 \bibliography{aaai2026}
 \section{Reproducibility Checklist}
 Unless specified otherwise, please answer ``yes'' to each question if the relevant information is described either in the paper itself or in a technical appendix with an explicit reference from the main paper. If you wish to explain an answer further, please do so in a section titled ``Reproducibility Checklist'' at the end of the technical appendix.
 This paper:
 Includes a conceptual outline and/or pseudocode description of AI methods introduced (yes/partial/no/NA)
 Clearly delineates statements that are opinions, hypothesis, and speculation from objective facts and results (yes/no)
 Provides well marked pedagogical references for less-familiare readers to gain background necessary to replicate the paper (yes/no)
 Does this paper make theoretical contributions? (yes/no)
 If yes, please complete the list below.
 All assumptions and restrictions are stated clearly and formally. (yes/partial/no)
 All novel claims are stated formally (e.g., in theorem statements). (yes/partial/no)
 Proofs of all novel claims are included. (yes/partial/no)
 Proof sketches or intuitions are given for complex and/or novel results. (yes/partial/no)
 Appropriate citations to theoretical tools used are given. (yes/partial/no)
 All theoretical claims are demonstrated empirically to hold. (yes/partial/no/NA)
 All experimental code used to eliminate or disprove claims is included. (yes/no/NA)
 Does this paper rely on one or more datasets? (yes/no)
 If yes, please complete the list below.
 A motivation is given for why the experiments are conducted on the selected datasets (yes/partial/no/NA)
 All novel datasets introduced in this paper are included in a data appendix. (yes/partial/no/NA)
 All novel datasets introduced in this paper will be made publicly available upon publication of the paper with a license that allows free usage for research purposes. (yes/partial/no/NA)
 All datasets drawn from the existing literature (potentially including authors' own previously published work) are accompanied by appropriate citations. (yes/no/NA)
 All datasets drawn from the existing literature (potentially including authors' own previously published work) are publicly available. (yes/partial/no/NA)
 All datasets that are not publicly available are described in detail, with explanation why publicly available alternatives are not scientifically satisficing. (yes/partial/no/NA)
 Does this paper include computational experiments? (yes/no)
 If yes, please complete the list below.
 This paper states the number and range of values tried per (hyper-) parameter during development of the paper, along with the criterion used for selecting the final parameter setting. (yes/partial/no/NA)
 Any code required for pre-processing data is included in the appendix. (yes/partial/no).
 All source code required for conducting and analyzing the experiments is included in a code appendix. (yes/partial/no)
 All source code required for conducting and analyzing the experiments will be made publicly available upon publication of the paper with a license that allows free usage for research purposes. (yes/partial/no)
 All source code implementing new methods have comments detailing the implementation, with references to the paper where each step comes from (yes/partial/no)
 If an algorithm depends on randomness, then the method used for setting seeds is described in a way sufficient to allow replication of results. (yes/partial/no/NA)
 This paper specifies the computing infrastructure used for running experiments (hardware and software), including GPU/CPU models; amount of memory; operating system; names and versions of relevant software libraries and frameworks. (yes/partial/no)
 This paper formally describes evaluation metrics used and explains the motivation for choosing these metrics. (yes/partial/no)
 This paper states the number of algorithm runs used to compute each reported result. (yes/no)
 Analysis of experiments goes beyond single-dimensional summaries of performance (e.g., average; median) to include measures of variation, confidence, or other distributional information. (yes/no)
 The significance of any improvement or decrease in performance is judged using appropriate statistical tests (e.g., Wilcoxon signed-rank). (yes/partial/no)
 This paper lists all final (hyper-)parameters used for each model/algorithm in the paper's experiments. (yes/partial/no/NA).
 \end{document} 
--- a/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026.bib
+++ b/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026.bib
@ -0,0 +1,111 @@
@book{em:86,
  editor  = "Engelmore, Robert and Morgan, Anthony",
  title   = "Blackboard Systems",
  year    = 1986,
  address = "Reading, Mass.",
  publisher = "Addison-Wesley",
 }
@inproceedings{c:83,
  author  = "Clancey, William J.",
  year    = 1983,
  title   = "{Communication, Simulation, and Intelligent
 Agents: Implications of Personal Intelligent Machines
 for Medical Education}",
  booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}", 
  pages   = "556-560",
  address = "Menlo Park, Calif",
  publisher = "{IJCAI Organization}",
 }
@inproceedings{c:84,
  author  = "Clancey, William J.",
  year    = 1984,
  title   = "{Classification Problem Solving}",
  booktitle = "Proceedings of the Fourth National 
              Conference on Artificial Intelligence",
  pages   = "45-54",
  address = "Menlo Park, Calif.",
  publisher="AAAI Press",
 }
@article{r:80,
  author = {Robinson, Arthur L.},
  title = {New Ways to Make Microcircuits Smaller},
  volume = {208},
  number = {4447},
  pages = {1019--1022},
  year = {1980},
  doi = {10.1126/science.208.4447.1019},
  publisher = {American Association for the Advancement of Science},
  issn = {0036-8075},
  URL = {https://science.sciencemag.org/content/208/4447/1019},
  eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf},
  journal = {Science},
 }
@article{r:80x,
  author  = "Robinson, Arthur L.",
  year    = 1980,
  title   = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}",
  journal = "Science",
  volume  =  208,
  pages   = "1019-1026",
 }
@article{hcr:83,
 title = {Strategic explanations for a diagnostic consultation system},
 journal = {International Journal of Man-Machine Studies},
 volume = {20},
 number = {1},
 pages = {3-19},
 year = {1984},
 issn = {0020-7373},
 doi = {https://doi.org/10.1016/S0020-7373(84)80003-6},
 url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036},
 author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels},
 abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.}
 }
@article{hcrt:83,
  author  = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas",
  year    = 1983,
  title   = "{Strategic Explanations in Consultation---Duplicate}",
  journal = "The International Journal of Man-Machine Studies",
  volume  = 20,
  number  = 1,
  pages   = "3-19",
 }
@techreport{r:86,
  author  = "Rice, James",
  year    = 1986,
  title   = "{Poligon: A System for Parallel Problem Solving}",
  type    = "Technical Report", 
  number  = "KSL-86-19", 
  institution = "Dept.\ of Computer Science, Stanford Univ.",
 }
@phdthesis{c:79,
  author  = "Clancey, William J.",
  year    = 1979,
  title   = "{Transfer of Rule-Based Expertise
 through a Tutorial Dialogue}",
  type    = "{Ph.D.} diss.",
  school  = "Dept.\ of Computer Science, Stanford Univ.",
  address = "Stanford, Calif.",
 }
@unpublished{c:21,
  author  = "Clancey, William J.",
  title   = "{The Engineering of Qualitative Models}",
  year    = 2021,
  note    = "Forthcoming",
 }
@misc{c:22,
      title={Attention Is All You Need}, 
      author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
      year={2017},
      eprint={1706.03762},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
 }
@misc{c:23,
  title        = "Pluto: The 'Other' Red Planet",
  author       = "{NASA}",
  howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}",
  year         = 2015,
  note         = "Accessed: 2018-12-06"
 }
--- a/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026.bst
+++ b/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026.bst
--- a/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026.sty
+++ b/skills/mlops/ml-paper-writing/templates/aaai2026/aaai2026.sty
@ -0,0 +1,315 @@
 \NeedsTeXFormat{LaTeX2e}%
 \ProvidesPackage{aaai2026}[2026/04/29 AAAI 2026 Submission format]%
 \def\year{2026}%
 \typeout{Conference Style for AAAI for LaTeX 2e -- version for submission}%
 %
 \def\copyright@on{T}
 \def\showauthors@on{T}
 \def\nocopyright{\gdef\copyright@on{}} % Copyright notice is required for camera-ready only.
 \DeclareOption{submission}{%
  \gdef\copyright@on{}%
  \gdef\showauthors@on{}%
  \long\gdef\pdfinfo #1{\relax}%
 }%
 \DeclareOption{draft}{%
  \gdef\copyright@on{}%
 }%
 \ProcessOptions\relax%
 % WARNING: IF YOU ARE USING THIS STYLE SHEET FOR AN AAAI PUBLICATION, YOU
 % MAY NOT MODIFY IT FOR ANY REASON. MODIFICATIONS (IN YOUR SOURCE
 % OR IN THIS STYLE SHEET WILL RESULT IN REJECTION OF YOUR PAPER).
 %
 % WARNING: This style is NOT guaranteed to work. It is provided in the
 % hope that it might make the preparation of papers easier, but this style
 % file is provided "as is" without warranty of any kind, either express or
 % implied, including but not limited to the implied warranties of
 % merchantability, fitness for a particular purpose, or noninfringement.
 % You use this style file at your own risk. Standard disclaimers apply.
 % There are undoubtably bugs in this style. If you would like to submit
 % bug fixes, improvements, etc. please let us know. Please use the contact form
 % at www.aaai.org.
 %
 % Do not use this file unless you are an experienced LaTeX user.
 %
 % PHYSICAL PAGE LAYOUT
 \setlength\topmargin{-0.25in} \setlength\oddsidemargin{-0.25in}
 \setlength\textheight{9.0in} \setlength\textwidth{7.0in}
 \setlength\columnsep{0.375in} \newlength\titlebox \setlength\titlebox{2.25in}
 \setlength\headheight{0pt}  \setlength\headsep{0pt}
 %\setlength\footheight{0pt}  \setlength\footskip{0pt}
 \thispagestyle{empty} \pagestyle{empty}
 \flushbottom \twocolumn \sloppy
 % We're never going to need a table of contents, so just flush it to
 % save space --- suggested by drstrip@sandia-2
 \def\addcontentsline#1#2#3{}
 % gf: PRINT COPYRIGHT NOTICE
 \def\copyright@year{\number\year}
 \def\copyright@text{Copyright \copyright\space \copyright@year,
 Association for the Advancement of Artificial Intelligence (www.aaai.org).
 All rights reserved.}
 \def\copyrighttext#1{\gdef\copyright@on{T}\gdef\copyright@text{#1}}
 \def\copyrightyear#1{\gdef\copyright@on{T}\gdef\copyright@year{#1}}
 % gf: End changes for copyright notice (used in \maketitle, below)
 % Title stuff, taken from deproc.
 %
 \def\maketitle{%
  \par%
  \begingroup % to make the footnote style local to the title
    \def\thefootnote{\fnsymbol{footnote}}
    \twocolumn[\@maketitle] \@thanks%
  \endgroup%
  % Insert copyright slug unless turned off
  \if T\copyright@on\insert\footins{\noindent\footnotesize\copyright@text}\fi%
  %
  \setcounter{footnote}{0}%
  \let\maketitle\relax%
  \let\@maketitle\relax%
  \gdef\@thanks{}%
  \gdef\@author{}%
  \gdef\@title{}%
  \let\thanks\relax%
 }%
 \long\gdef\affiliations #1{ \def \affiliations_{\if T\showauthors@on#1\fi}}%
 %
 \def\@maketitle{%
  \def\theauthors{\if T\showauthors@on\@author\else Anonymous submission\fi}
  \newcounter{eqfn}\setcounter{eqfn}{0}%
  \newsavebox{\titlearea}
  \sbox{\titlearea}{
    \let\footnote\relax\let\thanks\relax%
    \setcounter{footnote}{0}%
    \def\equalcontrib{%
      \ifnum\value{eqfn}=0%
        \footnote{These authors contributed equally.}%
        \setcounter{eqfn}{\value{footnote}}%
      \else%
        \footnotemark[\value{eqfn}]%
      \fi%
    }%
    \vbox{%
      \hsize\textwidth%
      \linewidth\hsize%
      \vskip 0.625in minus 0.125in%
      \centering%
      {\LARGE\bf \@title \par}%
      \vskip 0.1in plus 0.5fil minus 0.05in%
      {\Large{\textbf{\theauthors\ifhmode\\\fi}}}%
      \vskip .2em plus 0.25fil%
      {\normalsize \affiliations_\ifhmode\\\fi}%
      \vskip 1em plus 2fil%
    }%
  }%
 %
  \newlength\actualheight%
  \settoheight{\actualheight}{\usebox{\titlearea}}%
  \ifdim\actualheight>\titlebox%
    \setlength{\titlebox}{\actualheight}%
  \fi%
 %
  \vbox to \titlebox {%
    \let\footnote\thanks\relax%
    \setcounter{footnote}{0}%
    \def\equalcontrib{%
      \ifnum\value{eqfn}=0%
        \footnote{These authors contributed equally.}%
        \setcounter{eqfn}{\value{footnote}}%
      \else%
        \footnotemark[\value{eqfn}]%
      \fi%
    }%
    \hsize\textwidth%
    \linewidth\hsize%
    \vskip 0.625in minus 0.125in%
    \centering%
    {\LARGE\bf \@title \par}%
    \vskip 0.1in plus 0.5fil minus 0.05in%
    {\Large{\textbf{\theauthors\ifhmode\\\fi}}}%
    \vskip .2em plus 0.25fil%
    {\normalsize \affiliations_\ifhmode\\\fi}%
    \vskip 1em plus 2fil%
  }%
 }%
 %
 \renewenvironment{abstract}{%
  \centerline{\bf Abstract}%
  \vspace{0.5ex}%
  \setlength{\leftmargini}{10pt}%
  \begin{quote}%
    \small%
 }{%
  \par%
  \end{quote}%
  \vskip 1ex%
 }%
 \newenvironment{links}{%
  \newcommand{\link}[2]{\par\textbf{##1} --- \url{##2}}%
  \setlength{\hangindent}{10pt}%
  \setlength{\parskip}{2pt}%
  \begin{flushleft}%
 }{%
  \end{flushleft}%
  \vskip 1ex%
 }%
 % jsp added:
 \def\pubnote#1{
  \thispagestyle{myheadings}%
  \pagestyle{myheadings}%
  \markboth{#1}{#1}%
  \setlength\headheight{10pt}%
  \setlength\headsep{10pt}%
 }%
 %
 % SECTIONS with less space
 \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
 -0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\Large\bf\centering}}
 \def\subsection{\@startsection{subsection}{2}{\z@}{-2.0ex plus
 -0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\large\bf\raggedright}}
 \def\subsubsection{\@startsection{subparagraph}{3}{\z@}{-6pt plus
 %%% DIEGO changed: 29/11/2009
 %% 2pt minus 1pt}{-1em}{\normalsize\bf}}
 -2pt minus -1pt}{-1em}{\normalsize\bf}}
 %%% END changed
 \renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-6pt plus -2pt minus -1pt}{-1em}{\normalsize\bf}}%
 \setcounter{secnumdepth}{0}
 % add period to section (but not subsection) numbers, reduce space after
 %\renewcommand{\thesection}
 %   {\arabic{section}.\hskip-0.6em}
 %\renewcommand{\thesubsection}
 %   {\arabic{section}.\arabic{subsection}\hskip-0.6em}
 % FOOTNOTES
 \footnotesep 6.65pt %
 \skip\footins 9pt plus 4pt minus 2pt
 \def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
 \setcounter{footnote}{0}
 % LISTS AND PARAGRAPHS
 \parindent 10pt
 \topsep 4pt plus 1pt minus 2pt
 \partopsep 1pt plus 0.5pt minus 0.5pt
 \itemsep 0.5pt plus 1pt minus 0.5pt
 \parsep 2pt plus 1pt minus 0.5pt
 \leftmargin 10pt \leftmargini 13pt \leftmarginii 10pt \leftmarginiii 5pt \leftmarginiv 5pt \leftmarginv 5pt \leftmarginvi 5pt
 \labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
 \def\@listi{\leftmargin\leftmargini}
 \def\@listii{\leftmargin\leftmarginii
 \labelwidth\leftmarginii\advance\labelwidth-\labelsep
 \topsep 2pt plus 1pt minus 0.5pt
 \parsep 1pt plus 0.5pt minus 0.5pt
 \itemsep \parsep}
 \def\@listiii{\leftmargin\leftmarginiii
 \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
 \topsep 1pt plus 0.5pt minus 0.5pt
 \parsep \z@
 \partopsep 0.5pt plus 0pt minus 0.5pt
 \itemsep \topsep}
 \def\@listiv{\leftmargin\leftmarginiv
 \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
 \def\@listv{\leftmargin\leftmarginv
 \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
 \def\@listvi{\leftmargin\leftmarginvi
 \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
 \abovedisplayskip 7pt plus2pt minus5pt%
 \belowdisplayskip \abovedisplayskip
 \abovedisplayshortskip 0pt plus3pt%
 \belowdisplayshortskip 4pt plus3pt minus3pt%
 % Less leading in most fonts (due to the narrow columns)
 % The choices were between 1-pt and 1.5-pt leading
 \def\normalsize{\@setfontsize\normalsize\@xpt{11}}   % 10 point on 11
 \def\small{\@setfontsize\small\@ixpt{10}}    % 9 point on 10
 \def\footnotesize{\@setfontsize\footnotesize\@ixpt{10}}  % 9 point on 10
 \def\scriptsize{\@setfontsize\scriptsize\@viipt{10}}  % 7 point on 8
 \def\tiny{\@setfontsize\tiny\@vipt{7}}    % 6 point on 7
 \def\large{\@setfontsize\large\@xipt{12}}    % 11 point on 12
 \def\Large{\@setfontsize\Large\@xiipt{14}}    % 12 point on 14
 \def\LARGE{\@setfontsize\LARGE\@xivpt{16}}    % 14 point on 16
 \def\huge{\@setfontsize\huge\@xviipt{20}}    % 17 point on 20
 \def\Huge{\@setfontsize\Huge\@xxpt{23}}    % 20 point on 23
 \AtBeginDocument{%
  \@ifpackageloaded{natbib}%
    {%
      % When natbib is in use, set the proper style and fix a few things
      \let\cite\citep
      \let\shortcite\citeyearpar
      \setcitestyle{aysep={}}
      \setlength\bibhang{0pt}
      \bibliographystyle{aaai2026}
    }{}%
  \@ifpackageloaded{hyperref}%
    {%
      \PackageError{aaai}{You must not use hyperref in AAAI papers.}{You (or one of the packages you imported) are importing the hyperref package, which is forbidden in AAAI papers. You must remove it from the paper to proceed.}
    }{}%
  \@ifpackageloaded{bbm}%
    {%
      \PackageError{aaai}{You must not use bbm package in AAAI papers because it introduces Type 3 fonts which are forbidden.}{See https://tex.stackexchange.com/questions/479160/a-replacement-to-mathbbm1-with-type-1-fonts for possible alternatives.}
    }{}%
    \@ifpackageloaded{authblk}%
    {%
      \PackageError{aaai}{Package authblk is forbbidden.}{Package authblk is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{balance}%
    {%
      \PackageError{aaai}{Package balance is forbbidden.}{Package balance is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{CJK}%
    {%
      \PackageError{aaai}{Package CJK is forbbidden.}{Package CJK is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{flushend}%
    {%
      \PackageError{aaai}{Package flushend is forbbidden.}{Package flushend is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{fontenc}%
    {%
      \PackageError{aaai}{Package fontenc is forbbidden.}{Package fontenc is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{fullpage}%
    {%
      \PackageError{aaai}{Package fullpage is forbbidden.}{Package fullpage is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{geometry}%
    {%
      \PackageError{aaai}{Package geometry is forbbidden.}{Package geometry is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{grffile}%
    {%
      \PackageError{aaai}{Package grffile is forbbidden.}{Package grffile is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{navigator}%
    {%
      \PackageError{aaai}{Package navigator is forbbidden.}{Package navigator is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{savetrees}%
    {%
      \PackageError{aaai}{Package savetrees is forbbidden.}{Package savetrees is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{setspace}%
    {%
      \PackageError{aaai}{Package setspace is forbbidden.}{Package setspace is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{stfloats}%
    {%
      \PackageError{aaai}{Package stfloats is forbbidden.}{Package stfloats is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{tabu}%
    {%
      \PackageError{aaai}{Package tabu is forbbidden.}{Package tabu is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{titlesec}%
    {%
      \PackageError{aaai}{Package titlesec is forbbidden.}{Package titlesec is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{tocbibind}%
    {%
      \PackageError{aaai}{Package tocbibind is forbbidden.}{Package tocbibind is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{ulem}%
    {%
      \PackageError{aaai}{Package ulem is forbbidden.}{Package ulem is forbbiden. You must find an alternative.}
    }{}%
  \@ifpackageloaded{wrapfig}%
    {%
      \PackageError{aaai}{Package wrapfig is forbbidden.}{Package wrapfig is forbbiden. You must find an alternative.}
    }{}%
 }
 \let\endthebibliography=\endlist
--- a/skills/mlops/ml-paper-writing/templates/acl/README.md
+++ b/skills/mlops/ml-paper-writing/templates/acl/README.md
@ -0,0 +1,50 @@
 # *ACL Paper Styles
 This directory contains the latest LaTeX templates for *ACL conferences.
 ## Instructions for authors
 Paper submissions to *ACL conferences must use the official ACL style
 templates.
 The LaTeX style files are available
 - as an [Overleaf template](https://www.overleaf.com/latex/templates/association-for-computational-linguistics-acl-conference/jvxskxpnznfj)
 - in this repository
 - as a [.zip file](https://github.com/acl-org/acl-style-files/archive/refs/heads/master.zip)
 Please see [`acl_latex.tex`](https://github.com/acl-org/acl-style-files/blob/master/acl_latex.tex) for an example.
 Please follow the paper formatting guidelines general to *ACL
 conferences:
 - [Paper formatting guidelines](https://acl-org.github.io/ACLPUB/formatting.html)
 Authors may not modify these style files or use templates designed for
 other conferences.
 ## Instructions for publications chairs
 To adapt the style files for your conference, please fork this repository and
 make necessary changes. Minimally, you'll need to update the name of
 the conference and rename the files.
 If you make improvements to the templates that should be propagated to
 future conferences, please submit a pull request. Thank you in
 advance!
 In older versions of the templates, authors were asked to fill in the
 START submission ID so that it would be stamped at the top of each
 page of the anonymized version. This is no longer needed, because it
 is now possible to do this stamping automatically within
 START. Currently, the way to do this is for the program chair to email
 support@softconf.com and request it.
 ## Instructions for making changes to style files
 - merge pull request in github, or push to github
 - git pull from github to a local repository
 - then, git push from your local repository to overleaf project 
    - Overleaf project is https://www.overleaf.com/project/5f64f1fb97c4c50001b60549
    - Overleaf git url is https://git.overleaf.com/5f64f1fb97c4c50001b60549
 - then, click "Submit" and then "Submit as Template" in overleaf in order to ask overleaf to update the overleaf template from the overleaf project 
--- a/skills/mlops/ml-paper-writing/templates/acl/acl.sty
+++ b/skills/mlops/ml-paper-writing/templates/acl/acl.sty
@ -0,0 +1,312 @@
 % This is the LaTex style file for *ACL.
 % The official sources can be found at
 %
 %     https://github.com/acl-org/acl-style-files/
 %
 % This package is activated by adding
 %
 %    \usepackage{acl}
 %
 % to your LaTeX file. When submitting your paper for review, add the "review" option:
 %
 %    \usepackage[review]{acl}
 \newif\ifacl@finalcopy
 \newif\ifacl@anonymize
 \newif\ifacl@linenumbers
 \newif\ifacl@pagenumbers
 \DeclareOption{final}{\acl@finalcopytrue\acl@anonymizefalse\acl@linenumbersfalse\acl@pagenumbersfalse}
 \DeclareOption{review}{\acl@finalcopyfalse\acl@anonymizetrue\acl@linenumberstrue\acl@pagenumberstrue}
 \DeclareOption{preprint}{\acl@finalcopytrue\acl@anonymizefalse\acl@linenumbersfalse\acl@pagenumberstrue}
 \ExecuteOptions{final} % final copy is the default
 % include hyperref, unless user specifies nohyperref option like this:
 % \usepackage[nohyperref]{acl}
 \newif\ifacl@hyperref
 \DeclareOption{hyperref}{\acl@hyperreftrue}
 \DeclareOption{nohyperref}{\acl@hyperreffalse}
 \ExecuteOptions{hyperref} % default is to use hyperref
 \ProcessOptions\relax
 \typeout{Conference Style for ACL}
 \usepackage{xcolor}
 \ifacl@linenumbers
  % Add draft line numbering via the lineno package
  % https://texblog.org/2012/02/08/adding-line-numbers-to-documents/
  \usepackage[switch,mathlines]{lineno}
  % Line numbers in gray Helvetica 8pt
  \font\aclhv = phvb at 8pt
  \renewcommand\linenumberfont{\aclhv\color{lightgray}}
  % Zero-fill line numbers
  % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
  \newcount\cv@tmpc@ \newcount\cv@tmpc
  \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
    \cv@tmpc=1 %
    \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
      \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
    \ifnum#2<0\advance\cv@tmpc1\relax-\fi
    \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
    \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
  \renewcommand\thelinenumber{\fillzeros[3]{\arabic{linenumber}}}
  \AtBeginDocument{\linenumbers}
  \setlength{\linenumbersep}{1.6cm}
  % Bug: An equation with $$ ... $$ isn't numbered, nor is the previous line.
  % Patch amsmath commands so that the previous line and the equation itself
  % are numbered. Bug: multline has an extra line number.
  % https://tex.stackexchange.com/questions/461186/how-to-use-lineno-with-amsmath-align
  \usepackage{etoolbox} %% <- for \pretocmd, \apptocmd and \patchcmd
  \newcommand*\linenomathpatch[1]{%
    \expandafter\pretocmd\csname #1\endcsname {\linenomath}{}{}%
    \expandafter\pretocmd\csname #1*\endcsname {\linenomath}{}{}%
    \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
    \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
  }
  \newcommand*\linenomathpatchAMS[1]{%
    \expandafter\pretocmd\csname #1\endcsname {\linenomathAMS}{}{}%
    \expandafter\pretocmd\csname #1*\endcsname {\linenomathAMS}{}{}%
    \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
    \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
  }
  %% Definition of \linenomathAMS depends on whether the mathlines option is provided
  \expandafter\ifx\linenomath\linenomathWithnumbers
    \let\linenomathAMS\linenomathWithnumbers
    %% The following line gets rid of an extra line numbers at the bottom:
    \patchcmd\linenomathAMS{\advance\postdisplaypenalty\linenopenalty}{}{}{}
  \else
    \let\linenomathAMS\linenomathNonumbers
  \fi
  \AtBeginDocument{%
    \linenomathpatch{equation}%
    \linenomathpatchAMS{gather}%
    \linenomathpatchAMS{multline}%
    \linenomathpatchAMS{align}%
    \linenomathpatchAMS{alignat}%
    \linenomathpatchAMS{flalign}%
  }
 \else
  % Hack to ignore these commands, which review mode puts into the .aux file.
  \newcommand{\@LN@col}[1]{}
  \newcommand{\@LN}[2]{}
  \newcommand{\nolinenumbers}{}
 \fi
 \PassOptionsToPackage{a4paper,margin=2.5cm,heightrounded=true}{geometry}
 \RequirePackage{geometry}
 \setlength\columnsep{0.6cm}
 \newlength\titlebox
 \setlength\titlebox{11\baselineskip}
 % \titlebox should be a multiple of \baselineskip so that
 % column height remaining fits an exact number of lines of text
 \flushbottom \twocolumn \sloppy
 % We're never going to need a table of contents, so just flush it to
 % save space --- suggested by drstrip@sandia-2
 \def\addcontentsline#1#2#3{}
 \ifacl@pagenumbers
    \pagenumbering{arabic}
 \else
    \thispagestyle{empty}
    \pagestyle{empty}
 \fi
 %% Title and Authors %%
 \let\Thanks\thanks % \Thanks and \thanks used to be different, but keep this for backwards compatibility.
 \newcommand\outauthor{%
    \begin{tabular}[t]{c}
    \ifacl@anonymize
        \bfseries Anonymous ACL submission
    \else
        \bfseries\@author
    \fi
    \end{tabular}}
 % Mostly taken from deproc.
 \AtBeginDocument{
 \def\maketitle{\par
 \begingroup
   \def\thefootnote{\fnsymbol{footnote}}
   \twocolumn[\@maketitle]
   \@thanks
 \endgroup
 \setcounter{footnote}{0}
 \let\maketitle\relax
 \let\@maketitle\relax
 \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
 \def\@maketitle{\vbox to \titlebox{\hsize\textwidth
 \linewidth\hsize \vskip 0.125in minus 0.125in \centering
 {\Large\bfseries \@title \par} \vskip 0.2in plus 1fil minus 0.1in
 {\def\and{\unskip\enspace{\rmfamily and}\enspace}%
  \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
           \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bfseries}%
  \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
          \vskip 0.25in plus 1fil minus 0.125in
           \hbox to \linewidth\bgroup\large \hfil\hfil
             \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bfseries}
  \hbox to \linewidth\bgroup\large \hfil\hfil
    \hbox to 0pt\bgroup\hss
  \outauthor
   \hss\egroup
    \hfil\hfil\egroup}
  \vskip 0.3in plus 2fil minus 0.1in
 }}
 }
 % margins and font size for abstract
 \renewenvironment{abstract}%
  {\begin{center}\large\textbf{\abstractname}\end{center}%
    \begin{list}{}%
      {\setlength{\rightmargin}{0.6cm}%
        \setlength{\leftmargin}{0.6cm}}%
      \item[]\ignorespaces%
      \@setsize\normalsize{12pt}\xpt\@xpt
  }%
  {\unskip\end{list}}
 % Resizing figure and table captions - SL
 % Support for interacting with the caption, subfigure, and subcaption packages - SL
 \RequirePackage{caption}
 \DeclareCaptionFont{10pt}{\fontsize{10pt}{12pt}\selectfont}
 \captionsetup{font=10pt}
 \RequirePackage{natbib}
 % for citation commands in the .tex, authors can use:
 % \citep, \citet, and \citeyearpar for compatibility with natbib, or
 % \cite, \newcite, and \shortcite for compatibility with older ACL .sty files
 \renewcommand\cite{\citep}  % to get "(Author Year)" with natbib
 \newcommand\shortcite{\citeyearpar}% to get "(Year)" with natbib
 \newcommand\newcite{\citet} % to get "Author (Year)" with natbib
 \newcommand{\citeposs}[1]{\citeauthor{#1}'s (\citeyear{#1})} % to get "Author's (Year)"
 \bibliographystyle{acl_natbib}
 % Bibliography
 % Don't put a label in the bibliography at all.  Just use the unlabeled format
 % instead.
 \def\thebibliography#1{\vskip\parskip%
 \vskip\baselineskip%
 \def\baselinestretch{1}%
 \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
 \vskip-\parskip%
 \vskip-\baselineskip%
 \section*{References\@mkboth
 {References}{References}}\list
 {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
 \setlength{\itemindent}{-\parindent}}
 \def\newblock{\hskip .11em plus .33em minus -.07em}
 \sloppy\clubpenalty4000\widowpenalty4000
 \sfcode`\.=1000\relax}
 \let\endthebibliography=\endlist
 % Allow for a bibliography of sources of attested examples
 \def\thesourcebibliography#1{\vskip\parskip%
 \vskip\baselineskip%
 \def\baselinestretch{1}%
 \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
 \vskip-\parskip%
 \vskip-\baselineskip%
 \section*{Sources of Attested Examples\@mkboth
 {Sources of Attested Examples}{Sources of Attested Examples}}\list
 {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
 \setlength{\itemindent}{-\parindent}}
 \def\newblock{\hskip .11em plus .33em minus -.07em}
 \sloppy\clubpenalty4000\widowpenalty4000
 \sfcode`\.=1000\relax}
 \let\endthesourcebibliography=\endlist
 % sections with less space
 \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
    -0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bfseries\raggedright}}
 \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
    -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bfseries\raggedright}}
 %% changed by KO to - values to get the initial parindent right
 \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex plus
   -0.5ex minus -.2ex}{0.5ex plus .2ex}{\normalsize\bfseries\raggedright}}
 \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
   0.5ex minus .2ex}{-1em}{\normalsize\bfseries}}
 \def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
   0.5ex minus .2ex}{-1em}{\normalsize\bfseries}}
 % Footnotes
 \footnotesep 6.65pt %
 \skip\footins 9pt plus 4pt minus 2pt
 \def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
 \setcounter{footnote}{0}
 % Lists and paragraphs
 \parindent 1em
 \topsep 4pt plus 1pt minus 2pt
 \partopsep 1pt plus 0.5pt minus 0.5pt
 \itemsep 2pt plus 1pt minus 0.5pt
 \parsep 2pt plus 1pt minus 0.5pt
 \leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
 \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
 \labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
 \def\@listi{\leftmargin\leftmargini}
 \def\@listii{\leftmargin\leftmarginii
   \labelwidth\leftmarginii\advance\labelwidth-\labelsep
   \topsep 2pt plus 1pt minus 0.5pt
   \parsep 1pt plus 0.5pt minus 0.5pt
   \itemsep \parsep}
 \def\@listiii{\leftmargin\leftmarginiii
    \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
    \topsep 1pt plus 0.5pt minus 0.5pt
    \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
    \itemsep \topsep}
 \def\@listiv{\leftmargin\leftmarginiv
     \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
 \def\@listv{\leftmargin\leftmarginv
     \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
 \def\@listvi{\leftmargin\leftmarginvi
     \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
 \abovedisplayskip 7pt plus2pt minus5pt%
 \belowdisplayskip \abovedisplayskip
 \abovedisplayshortskip  0pt plus3pt%
 \belowdisplayshortskip  4pt plus3pt minus3pt%
 % Less leading in most fonts (due to the narrow columns)
 % The choices were between 1-pt and 1.5-pt leading
 \def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
 \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
 \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
 \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
 \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
 \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
 \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
 \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
 \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
 \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
 % The hyperref manual (section 9) says hyperref should be loaded after natbib
 \ifacl@hyperref
  \PassOptionsToPackage{breaklinks}{hyperref}
  \RequirePackage{hyperref}
  % make links dark blue
  \definecolor{darkblue}{rgb}{0, 0, 0.5}
  \hypersetup{colorlinks=true, citecolor=darkblue, linkcolor=darkblue, urlcolor=darkblue}
 \else
  % This definition is used if the hyperref package is not loaded.
  % It provides a backup, no-op definiton of \href.
  % This is necessary because \href command is used in the acl_natbib.bst file.
  \def\href#1#2{{#2}}
  \usepackage{url}
 \fi
--- a/skills/mlops/ml-paper-writing/templates/acl/acl_latex.tex
+++ b/skills/mlops/ml-paper-writing/templates/acl/acl_latex.tex
@ -0,0 +1,377 @@
 \documentclass[11pt]{article}
 % Change "review" to "final" to generate the final (sometimes called camera-ready) version.
 % Change to "preprint" to generate a non-anonymous version with page numbers.
 \usepackage[review]{acl}
 % Standard package includes
 \usepackage{times}
 \usepackage{latexsym}
 % For proper rendering and hyphenation of words containing Latin characters (including in bib files)
 \usepackage[T1]{fontenc}
 % For Vietnamese characters
 % \usepackage[T5]{fontenc}
 % See https://www.latex-project.org/help/documentation/encguide.pdf for other character sets
 % This assumes your files are encoded as UTF8
 \usepackage[utf8]{inputenc}
 % This is not strictly necessary, and may be commented out,
 % but it will improve the layout of the manuscript,
 % and will typically save some space.
 \usepackage{microtype}
 % This is also not strictly necessary, and may be commented out.
 % However, it will improve the aesthetics of text in
 % the typewriter font.
 \usepackage{inconsolata}
 %Including images in your LaTeX document requires adding
 %additional package(s)
 \usepackage{graphicx}
 % If the title and author information does not fit in the area allocated, uncomment the following
 %
 %\setlength\titlebox{<dim>}
 %
 % and set <dim> to something 5cm or larger.
 \title{Instructions for *ACL Proceedings}
 % Author information can be set in various styles:
 % For several authors from the same institution:
 % \author{Author 1 \and ... \and Author n \\
 %         Address line \\ ... \\ Address line}
 % if the names do not fit well on one line use
 %         Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
 % For authors from different institutions:
 % \author{Author 1 \\ Address line \\  ... \\ Address line
 %         \And  ... \And
 %         Author n \\ Address line \\ ... \\ Address line}
 % To start a separate ``row'' of authors use \AND, as in
 % \author{Author 1 \\ Address line \\  ... \\ Address line
 %         \AND
 %         Author 2 \\ Address line \\ ... \\ Address line \And
 %         Author 3 \\ Address line \\ ... \\ Address line}
 \author{First Author \\
  Affiliation / Address line 1 \\
  Affiliation / Address line 2 \\
  Affiliation / Address line 3 \\
  \texttt{email@domain} \\\And
  Second Author \\
  Affiliation / Address line 1 \\
  Affiliation / Address line 2 \\
  Affiliation / Address line 3 \\
  \texttt{email@domain} \\}
 %\author{
 %  \textbf{First Author\textsuperscript{1}},
 %  \textbf{Second Author\textsuperscript{1,2}},
 %  \textbf{Third T. Author\textsuperscript{1}},
 %  \textbf{Fourth Author\textsuperscript{1}},
 %\\
 %  \textbf{Fifth Author\textsuperscript{1,2}},
 %  \textbf{Sixth Author\textsuperscript{1}},
 %  \textbf{Seventh Author\textsuperscript{1}},
 %  \textbf{Eighth Author \textsuperscript{1,2,3,4}},
 %\\
 %  \textbf{Ninth Author\textsuperscript{1}},
 %  \textbf{Tenth Author\textsuperscript{1}},
 %  \textbf{Eleventh E. Author\textsuperscript{1,2,3,4,5}},
 %  \textbf{Twelfth Author\textsuperscript{1}},
 %\\
 %  \textbf{Thirteenth Author\textsuperscript{3}},
 %  \textbf{Fourteenth F. Author\textsuperscript{2,4}},
 %  \textbf{Fifteenth Author\textsuperscript{1}},
 %  \textbf{Sixteenth Author\textsuperscript{1}},
 %\\
 %  \textbf{Seventeenth S. Author\textsuperscript{4,5}},
 %  \textbf{Eighteenth Author\textsuperscript{3,4}},
 %  \textbf{Nineteenth N. Author\textsuperscript{2,5}},
 %  \textbf{Twentieth Author\textsuperscript{1}}
 %\\
 %\\
 %  \textsuperscript{1}Affiliation 1,
 %  \textsuperscript{2}Affiliation 2,
 %  \textsuperscript{3}Affiliation 3,
 %  \textsuperscript{4}Affiliation 4,
 %  \textsuperscript{5}Affiliation 5
 %\\
 %  \small{
 %    \textbf{Correspondence:} \href{mailto:email@domain}{email@domain}
 %  }
 %}
 \begin{document}
 \maketitle
 \begin{abstract}
 This document is a supplement to the general instructions for *ACL authors. It contains instructions for using the \LaTeX{} style files for ACL conferences.
 The document itself conforms to its own specifications, and is therefore an example of what your manuscript should look like.
 These instructions should be used both for papers submitted for review and for final versions of accepted papers.
 \end{abstract}
 \section{Introduction}
 These instructions are for authors submitting papers to *ACL conferences using \LaTeX. They are not self-contained. All authors must follow the general instructions for *ACL proceedings,\footnote{\url{http://acl-org.github.io/ACLPUB/formatting.html}} and this document contains additional instructions for the \LaTeX{} style files.
 The templates include the \LaTeX{} source of this document (\texttt{acl\_latex.tex}),
 the \LaTeX{} style file used to format it (\texttt{acl.sty}),
 an ACL bibliography style (\texttt{acl\_natbib.bst}),
 an example bibliography (\texttt{custom.bib}),
 and the bibliography for the ACL Anthology (\texttt{anthology.bib}).
 \section{Engines}
 To produce a PDF file, pdf\LaTeX{} is strongly recommended (over original \LaTeX{} plus dvips+ps2pdf or dvipdf).
 The style file \texttt{acl.sty} can also be used with
 lua\LaTeX{} and
 Xe\LaTeX{}, which are especially suitable for text in non-Latin scripts.
 The file \texttt{acl\_lualatex.tex} in this repository provides
 an example of how to use \texttt{acl.sty} with either
 lua\LaTeX{} or
 Xe\LaTeX{}.
 \section{Preamble}
 The first line of the file must be
 \begin{quote}
 \begin{verbatim}
 \documentclass[11pt]{article}
 \end{verbatim}
 \end{quote}
 To load the style file in the review version:
 \begin{quote}
 \begin{verbatim}
 \usepackage[review]{acl}
 \end{verbatim}
 \end{quote}
 For the final version, omit the \verb|review| option:
 \begin{quote}
 \begin{verbatim}
 \usepackage{acl}
 \end{verbatim}
 \end{quote}
 To use Times Roman, put the following in the preamble:
 \begin{quote}
 \begin{verbatim}
 \usepackage{times}
 \end{verbatim}
 \end{quote}
 (Alternatives like txfonts or newtx are also acceptable.)
 Please see the \LaTeX{} source of this document for comments on other packages that may be useful.
 Set the title and author using \verb|\title| and \verb|\author|. Within the author list, format multiple authors using \verb|\and| and \verb|\And| and \verb|\AND|; please see the \LaTeX{} source for examples.
 By default, the box containing the title and author names is set to the minimum of 5 cm. If you need more space, include the following in the preamble:
 \begin{quote}
 \begin{verbatim}
 \setlength\titlebox{<dim>}
 \end{verbatim}
 \end{quote}
 where \verb|<dim>| is replaced with a length. Do not set this length smaller than 5 cm.
 \section{Document Body}
 \subsection{Footnotes}
 Footnotes are inserted with the \verb|\footnote| command.\footnote{This is a footnote.}
 \subsection{Tables and figures}
 See Table~\ref{tab:accents} for an example of a table and its caption.
 \textbf{Do not override the default caption sizes.}
 \begin{table}
  \centering
  \begin{tabular}{lc}
    \hline
    \textbf{Command} & \textbf{Output} \\
    \hline
    \verb|{\"a}|     & {\"a}           \\
    \verb|{\^e}|     & {\^e}           \\
    \verb|{\`i}|     & {\`i}           \\
    \verb|{\.I}|     & {\.I}           \\
    \verb|{\o}|      & {\o}            \\
    \verb|{\'u}|     & {\'u}           \\
    \verb|{\aa}|     & {\aa}           \\\hline
  \end{tabular}
  \begin{tabular}{lc}
    \hline
    \textbf{Command} & \textbf{Output} \\
    \hline
    \verb|{\c c}|    & {\c c}          \\
    \verb|{\u g}|    & {\u g}          \\
    \verb|{\l}|      & {\l}            \\
    \verb|{\~n}|     & {\~n}           \\
    \verb|{\H o}|    & {\H o}          \\
    \verb|{\v r}|    & {\v r}          \\
    \verb|{\ss}|     & {\ss}           \\
    \hline
  \end{tabular}
  \caption{Example commands for accented characters, to be used in, \emph{e.g.}, Bib\TeX{} entries.}
  \label{tab:accents}
 \end{table}
 As much as possible, fonts in figures should conform
 to the document fonts. See Figure~\ref{fig:experiments} for an example of a figure and its caption.
 Using the \verb|graphicx| package graphics files can be included within figure
 environment at an appropriate point within the text.
 The \verb|graphicx| package supports various optional arguments to control the
 appearance of the figure.
 You must include it explicitly in the \LaTeX{} preamble (after the
 \verb|\documentclass| declaration and before \verb|\begin{document}|) using
 \verb|\usepackage{graphicx}|.
 \begin{figure}[t]
  \includegraphics[width=\columnwidth]{example-image-golden}
  \caption{A figure with a caption that runs for more than one line.
    Example image is usually available through the \texttt{mwe} package
    without even mentioning it in the preamble.}
  \label{fig:experiments}
 \end{figure}
 \begin{figure*}[t]
  \includegraphics[width=0.48\linewidth]{example-image-a} \hfill
  \includegraphics[width=0.48\linewidth]{example-image-b}
  \caption {A minimal working example to demonstrate how to place
    two images side-by-side.}
 \end{figure*}
 \subsection{Hyperlinks}
 Users of older versions of \LaTeX{} may encounter the following error during compilation:
 \begin{quote}
 \verb|\pdfendlink| ended up in different nesting level than \verb|\pdfstartlink|.
 \end{quote}
 This happens when pdf\LaTeX{} is used and a citation splits across a page boundary. The best way to fix this is to upgrade \LaTeX{} to 2018-12-01 or later.
 \subsection{Citations}
 \begin{table*}
  \centering
  \begin{tabular}{lll}
    \hline
    \textbf{Output}           & \textbf{natbib command} & \textbf{ACL only command} \\
    \hline
    \citep{Gusfield:97}       & \verb|\citep|           &                           \\
    \citealp{Gusfield:97}     & \verb|\citealp|         &                           \\
    \citet{Gusfield:97}       & \verb|\citet|           &                           \\
    \citeyearpar{Gusfield:97} & \verb|\citeyearpar|     &                           \\
    \citeposs{Gusfield:97}    &                         & \verb|\citeposs|          \\
    \hline
  \end{tabular}
  \caption{\label{citation-guide}
    Citation commands supported by the style file.
    The style is based on the natbib package and supports all natbib citation commands.
    It also supports commands defined in previous ACL style files for compatibility.
  }
 \end{table*}
 Table~\ref{citation-guide} shows the syntax supported by the style files.
 We encourage you to use the natbib styles.
 You can use the command \verb|\citet| (cite in text) to get ``author (year)'' citations, like this citation to a paper by \citet{Gusfield:97}.
 You can use the command \verb|\citep| (cite in parentheses) to get ``(author, year)'' citations \citep{Gusfield:97}.
 You can use the command \verb|\citealp| (alternative cite without parentheses) to get ``author, year'' citations, which is useful for using citations within parentheses (e.g. \citealp{Gusfield:97}).
 A possessive citation can be made with the command \verb|\citeposs|.
 This is not a standard natbib command, so it is generally not compatible
 with other style files.
 \subsection{References}
 \nocite{Ando2005,andrew2007scalable,rasooli-tetrault-2015}
 The \LaTeX{} and Bib\TeX{} style files provided roughly follow the American Psychological Association format.
 If your own bib file is named \texttt{custom.bib}, then placing the following before any appendices in your \LaTeX{} file will generate the references section for you:
 \begin{quote}
 \begin{verbatim}
 \bibliography{custom}
 \end{verbatim}
 \end{quote}
 You can obtain the complete ACL Anthology as a Bib\TeX{} file from \url{https://aclweb.org/anthology/anthology.bib.gz}.
 To include both the Anthology and your own .bib file, use the following instead of the above.
 \begin{quote}
 \begin{verbatim}
 \bibliography{anthology,custom}
 \end{verbatim}
 \end{quote}
 Please see Section~\ref{sec:bibtex} for information on preparing Bib\TeX{} files.
 \subsection{Equations}
 An example equation is shown below:
 \begin{equation}
  \label{eq:example}
  A = \pi r^2
 \end{equation}
 Labels for equation numbers, sections, subsections, figures and tables
 are all defined with the \verb|\label{label}| command and cross references
 to them are made with the \verb|\ref{label}| command.
 This an example cross-reference to Equation~\ref{eq:example}.
 \subsection{Appendices}
 Use \verb|\appendix| before any appendix section to switch the section numbering over to letters. See Appendix~\ref{sec:appendix} for an example.
 \section{Bib\TeX{} Files}
 \label{sec:bibtex}
 Unicode cannot be used in Bib\TeX{} entries, and some ways of typing special characters can disrupt Bib\TeX's alphabetization. The recommended way of typing special characters is shown in Table~\ref{tab:accents}.
 Please ensure that Bib\TeX{} records contain DOIs or URLs when possible, and for all the ACL materials that you reference.
 Use the \verb|doi| field for DOIs and the \verb|url| field for URLs.
 If a Bib\TeX{} entry has a URL or DOI field, the paper title in the references section will appear as a hyperlink to the paper, using the hyperref \LaTeX{} package.
 \section*{Limitations}
 This document does not cover the content requirements for ACL or any
 other specific venue.  Check the author instructions for
 information on
 maximum page lengths, the required ``Limitations'' section,
 and so on.
 \section*{Acknowledgments}
 This document has been adapted
 by Steven Bethard, Ryan Cotterell and Rui Yan
 from the instructions for earlier ACL and NAACL proceedings, including those for
 ACL 2019 by Douwe Kiela and Ivan Vuli\'{c},
 NAACL 2019 by Stephanie Lukin and Alla Roskovskaya,
 ACL 2018 by Shay Cohen, Kevin Gimpel, and Wei Lu,
 NAACL 2018 by Margaret Mitchell and Stephanie Lukin,
 Bib\TeX{} suggestions for (NA)ACL 2017/2018 from Jason Eisner,
 ACL 2017 by Dan Gildea and Min-Yen Kan,
 NAACL 2017 by Margaret Mitchell,
 ACL 2012 by Maggie Li and Michael White,
 ACL 2010 by Jing-Shin Chang and Philipp Koehn,
 ACL 2008 by Johanna D. Moore, Simone Teufel, James Allan, and Sadaoki Furui,
 ACL 2005 by Hwee Tou Ng and Kemal Oflazer,
 ACL 2002 by Eugene Charniak and Dekang Lin,
 and earlier ACL and EACL formats written by several people, including
 John Chen, Henry S. Thompson and Donald Walker.
 Additional elements were taken from the formatting instructions of the \emph{International Joint Conference on Artificial Intelligence} and the \emph{Conference on Computer Vision and Pattern Recognition}.
 % Bibliography entries for the entire Anthology, followed by custom entries
 %\bibliography{custom,anthology-overleaf-1,anthology-overleaf-2}
 % Custom bibliography entries only
 \bibliography{custom}
 \appendix
 \section{Example Appendix}
 \label{sec:appendix}
 This is an appendix.
 \end{document}
--- a/skills/mlops/ml-paper-writing/templates/acl/acl_lualatex.tex
+++ b/skills/mlops/ml-paper-writing/templates/acl/acl_lualatex.tex
@ -0,0 +1,101 @@
 % This file compiles with both LuaLaTeX and XeLaTeX
 \documentclass[11pt]{article}
 % Change "review" to "final" to generate the final (sometimes called camera-ready) version.
 % Change to "preprint" to generate a non-anonymous version with page numbers.
 \usepackage[review]{acl}
 % This is not strictly necessary, and may be commented out,
 % but it will improve the layout of the manuscript,
 % and will typically save some space.
 \usepackage{microtype}
 % If the title and author information does not fit in the area allocated, uncomment the following
 %
 %\setlength\titlebox{<dim>}
 %
 % and set <dim> to something 5cm or larger.
 % These font selection commands work with
 % LuaLaTeX and XeLaTeX, but not pdfLaTeX.
 \usepackage[english,bidi=default]{babel} % English as the main language.
 \babelfont{rm}{TeXGyreTermesX} % similar to Times
 %%% include whatever languages you need below this line
 \babelprovide[import]{hindi}
 \babelfont[*devanagari]{rm}{Lohit Devanagari}
 \babelprovide[import]{arabic}
 \babelfont[*arabic]{rm}{Noto Sans Arabic}
 %\usepackage{polyglossia}
 %\setdefaultlanguage{english}
 %\setotherlanguages{arabic,russian,thai,hindi,kannada}
 %%%%%
 \title{LuaLaTeX and XeLaTeX Template for *ACL Style Files}
 % Author information can be set in various styles:
 % For several authors from the same institution:
 % \author{Author 1 \and ... \and Author n \\
 %         Address line \\ ... \\ Address line}
 % if the names do not fit well on one line use
 %         Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
 % For authors from different institutions:
 % \author{Author 1 \\ Address line \\  ... \\ Address line
 %         \And  ... \And
 %         Author n \\ Address line \\ ... \\ Address line}
 % To start a seperate ``row'' of authors use \AND, as in
 % \author{Author 1 \\ Address line \\  ... \\ Address line
 %         \AND
 %         Author 2 \\ Address line \\ ... \\ Address line \And
 %         Author 3 \\ Address line \\ ... \\ Address line}
 \author{First Author \\
  Affiliation / Address line 1 \\
  Affiliation / Address line 2 \\
  Affiliation / Address line 3 \\
  \texttt{email@domain} \\\And
  Second Author \\
  Affiliation / Address line 1 \\
  Affiliation / Address line 2 \\
  Affiliation / Address line 3 \\
  \texttt{email@domain} \\}
 \begin{document}
 \maketitle
 \begin{abstract}
 This document provides an example showing how
 to use the *ACL style files with either
 LuaLaTeX or XeLaTeX.
 \end{abstract}
 \section{Introduction}
 Please see the general instructions
 in the file \verb|acl_latex.tex|.
 Here are some examples of text in various languages.
 Hindi: \foreignlanguage{hindi}{मानव अधिकारों की सार्वभौम घोषणा}
 Arabic: \foreignlanguage{arabic}{الإعلان العالمي لحقوق الإنسان}
 Here is an example citation:
 \citet{Gusfield:97} argues that...
 % Entries for the entire Anthology, followed by custom entries
 \bibliography{custom}
 \appendix
 \section{Example Appendix}
 \label{sec:appendix}
 This is an appendix.
 \end{document}
--- a/skills/mlops/ml-paper-writing/templates/acl/acl_natbib.bst
+++ b/skills/mlops/ml-paper-writing/templates/acl/acl_natbib.bst
--- a/skills/mlops/ml-paper-writing/templates/acl/anthology.bib.txt
+++ b/skills/mlops/ml-paper-writing/templates/acl/anthology.bib.txt
@ -0,0 +1,26 @@
 For citing papers in the ACL Anthology, we provide a single consolidated
 BibTeX file containing all of its papers. The bibkeys in these papers are
 designed to be semantic in nature: {names}-{year}-{words}, where
 - `names` is the concatenated last names of the authors when there is just
  one or two authors, or `lastname-etal` for 3+
 - `year` is the four-digit year
 - `words` is the first significant word in the title, or more, if necessary,
  to preserve uniqueness
 For example, https://aclanthology.org/N04-1035 can be cited as \cite{galley-etal-2004-whats}.
 The consolidated file can be downloaded from here:
 - https://aclanthology.org/anthology.bib
 Unfortunately, as of 2024 or so, this file is now larger than 50 MB, which is Overleaf's
 bib file size limit. Consequently, the Anthology shards the file automatically into
 49 MB shards.
 There are currently (2025) two files:
 - https://aclanthology.org/anthology-1.bib
 - https://aclanthology.org/anthology-2.bib
 You can download these directly from Overleaf from New File -> From External URL,
 and then adding them to the \bibliography line in acl_latex.tex:
    \bibliography{custom,anthology-1,anthology-2}
--- a/skills/mlops/ml-paper-writing/templates/acl/custom.bib
+++ b/skills/mlops/ml-paper-writing/templates/acl/custom.bib
@ -0,0 +1,70 @@
 % Use this file for citations not found in the ACL Anthology (contained in "anthology.bib").
@book{Aho:72,
    author  = {Alfred V. Aho and Jeffrey D. Ullman},
    title   = {The Theory of Parsing, Translation and Compiling},
    year    = "1972",
    volume  = "1",
    publisher = {Prentice-Hall},
    address = {Englewood Cliffs, NJ}
 }
@book{APA:83,
    author  = {{American Psychological Association}},
    title   = {Publications Manual},
    year    = "1983",
    publisher = {American Psychological Association},
    address = {Washington, DC}
 }
@article{Chandra:81,
 	author = {Ashok K. Chandra and Dexter C. Kozen and Larry J. Stockmeyer},
 	year = "1981",
 	title = {Alternation},
 	journal = {Journal of the Association for Computing Machinery},
 	volume = "28",
 	number = "1",
 	pages = "114--133",
 	doi = "10.1145/322234.322243",
 }
@inproceedings{andrew2007scalable,
  title={Scalable training of {L1}-regularized log-linear models},
  author={Andrew, Galen and Gao, Jianfeng},
  booktitle={Proceedings of the 24th International Conference on Machine Learning},
  pages={33--40},
  year={2007},
 }
@book{Gusfield:97,
    author  = {Dan Gusfield},
    title   = {Algorithms on Strings, Trees and Sequences},
    year    = "1997",
    publisher = {Cambridge University Press},
    address = {Cambridge, UK}
 }
@article{rasooli-tetrault-2015,
    author    = {Mohammad Sadegh Rasooli and Joel R. Tetreault},
    title     = {Yara Parser: {A} Fast and Accurate Dependency Parser},
    journal   = {Computing Research Repository},
    volume    = {arXiv:1503.06733},
    year      = {2015},
    url       = {http://arxiv.org/abs/1503.06733},
    note    = {version 2}
 }
@article{Ando2005,
 	Acmid = {1194905},
 	Author = {Ando, Rie Kubota and Zhang, Tong},
 	Issn = {1532-4435},
 	Issue_Date = {12/1/2005},
 	Journal = {Journal of Machine Learning Research},
 	Month = dec,
 	Numpages = {37},
 	Pages = {1817--1853},
 	Publisher = {JMLR.org},
 	Title = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data},
 	Volume = {6},
 	Year = {2005}
 }
--- a/skills/mlops/ml-paper-writing/templates/acl/formatting.md
+++ b/skills/mlops/ml-paper-writing/templates/acl/formatting.md
@ -0,0 +1,326 @@
 # Instructions for *ACL Proceedings
 The following instructions are for authors of papers submitted for review to ACL conferences (hereafter, "review version") or paper accepted for publication in its proceedings (hereafter, "final version").
 All authors are required to adhere to these specifications.
 ## Style Files
 *ACL provides style files for LaTeX and Microsoft Word that meet these requirements. They can be found at:
 > https://acl-org.github.io/ACLPUB/
 We strongly recommend the use of these style files, which have been appropriately tailored for the *ACL proceedings.
 ## Paper Length
 The conference accepts submissions of long papers and short papers.
 Review versions of long papers may have up to eight (8) pages of content plus unlimited pages for references.
 Upon acceptance, final versions of long papers will be given one additional page -- up to nine (9) pages of content plus unlimited pages for acknowledgements and references -- so that reviewers' comments can be taken into account.
 Review versions of short papers may have up to four (4) pages of content, plus unlimited pages for references.
 Final versions of short papers may have up to five (5) pages, plus unlimited pages for acknowledgements and references.
 For both long and short papers, all figures and tables that are part of the main text must fit within these page limits.
 The conference encourages submission of appendices and supplementary material, which are not required to fit within these page limits. However, review versions of papers must be self-contained: it is optional for reviewers to look at appendices or supplementary material. Please see [Appendices](#Appendices) and [Supplementary](#Supplementary Material) for more information.
 Review versions should not refer, for further detail, to documents, code or data resources that are not available to the reviewers.
 Papers that do not conform to these requirements may be rejected without review.
 Workshop chairs may have different rules for allowed length and whether appendices or supplementary materials are welcome.
 As always, the respective call for papers is the authoritative source.
 ## Anonymity
 As reviewing will be double-blind, review versions must not include any identifying information about the authors (such as names, affiliations, or URLs).
 Self-references that reveal the author's identity, e.g.,
 > We previously showed (Gusfield, 1997)...
 must be avoided, and anonymous citations, e.g.,
 > We previously showed (Anonymous, 1997)...
 should also be avoided. Instead, use citations such as
 > Gusfield (1997) previously showed...
 Review versions must not include acknowledgements.
 **Papers that do not conform to these requirements may be rejected without review.**
 Any preliminary non-archival versions of submitted papers should be listed in the submission form but not in the review version of the paper.
 Reviewers are generally aware that authors may present preliminary versions of their work in other venues, but will not be provided the list of previous presentations from the submission form.
 Once a paper has been accepted to the conference, the final version should include the author's names and affiliations, and is allowed to use self-references.
 ## Multiple Submission
 Papers that have been or will be submitted to other meetings or publications must indicate this at submission time in the START submission form, and must be withdrawn from the other venues if accepted by *ACL.
 Authors of papers accepted for presentation at *ACL must notify the program chairs by the deadline for final versions ("camera-ready deadline") whether the paper will be presented.
 We will not accept for publication or presentation any papers that overlap significantly in content or results with papers that will be (or have been) published elsewhere.
 Authors submitting more than one paper to *ACL must ensure that submissions do not overlap significantly (>25%) with each other in content or results.
 ## Formatting Instructions
 ### File Format
 Papers must be in Adobe Portable Document Format (PDF).
 Please make sure that your PDF file embeds all necessary fonts (especially for tree diagrams, symbols, and Asian languages).
 When you print or create the PDF file, there is usually an option in your printer setup to include none, all or just non-standard fonts.
 Please make sure that you select the option of including *all* the fonts.
 **Before sending it, test your PDF by printing it from a computer different from the one where it was created.**
 Some word processors may generate very large PDF files, where each page is rendered as an image.
 Such images may reproduce poorly.
 In this case, try alternative ways to obtain the PDF.
 All papers must use **A4 paper format** (21 cm x 29.7 cm).
 Papers must not be submitted with any other paper size.
 If you cannot meet the above requirements, please contact the publication chairs as soon as possible.
 ### Layout
 All text except for page numbers must fit within the margins.
 Review versions should have page numbers, centered in the bottom margin, but **pages should not be numbered in the final version.**
 Manuscripts must be set in two columns.
 Exceptions to the two-column format include the title, authors' names and complete addresses, which must be centered at the top of the first page, and any full-width figures or tables.
 The exact dimensions for a page on A4 paper are:
 * Left margin: 2.5 cm
 * Right margin: 2.5 cm
 * Top margin: 2.5 cm
 * Bottom margin: 2.5 cm
 * Column width: 7.7 cm
 * Column height: 24.7 cm
 * Gap between columns: 0.6 cm
 In the review version, a ruler (line numbers in the left and right margins of the article) should be printed, so that reviewers may comment on particular lines in the paper.
 The ruler should not change the appearance of any other content on the page.
 The final version should not contain a ruler.
 ### Fonts
 All text (except non-Latin scripts and mathematical formulas) should be set in **Times Roman**.
 If Times Roman is unavailable, you may use **Times New Roman** or **Computer Modern Roman.**
 The following table specifies what font sizes and styles must be used for each type of text in the manuscript.
 | Type of Text          | Font Size | Style |
 | --------------------- | --------- | ----- |
 | paper title           | 15 pt     | bold  |
 | author names          | 12 pt     | bold  |
 | author affiliation    | 12 pt     |       |
 | the word ``Abstract'' | 12 pt     | bold  |
 | section titles        | 12 pt     | bold  |
 | subsection titles     | 11 pt     | bold  |
 | document text         | 11 pt     |       |
 | captions              | 10 pt     |       |
 | abstract text         | 10 pt     |       |
 | bibliography          | 10 pt     |       |
 | footnotes             | 9 pt      |       |
 ### Title and Authors
 Center the title, author's name(s) and affiliation(s) across both columns.
 Place the title centered at the top of the first page, in 15-point bold.
 Long titles should be typed on two lines without a blank line intervening.
 Put the title 2.5 cm from the top of the page.
 Write the title in [title case](https://apastyle.apa.org/style-grammar-guidelines/capitalization/title-case); do not write the title in all capital letters, except for acronyms (e.g., "BLEU") or proper nouns ("English") that are normally uppercased or capitalized.
 Place the author name(s) and affiliation(s) under the title.
 Write authors' full names; do not abbreviate given names to initials, unless they are normally written as initials ("Margaret Mitchell", not "M. Mitchell").
 Do not format surnames in all capitals ("Mitchell", not "MITCHELL").
 Do not use footnotes for affiliations.
 The affiliation should contain the author's complete address, and if possible, an electronic mail address.
 The title, author names and addresses should be completely identical to those entered to the paper submission website in order to maintain the consistency of author information among all publications of the conference.
 If they are different, the publication chairs may resolve the difference without consulting with you; so it is in your own interest to double-check that the information is consistent.
 Start the body of the first page 7.5 cm from the top of the page.
 **Even in the review version of the paper, you should maintain space for names and addresses so that they will fit in the final version.**
 ### Abstract
 Type the abstract at the beginning of the first column.
 Center the word **Abstract** in 12 point bold above the body of the abstract.
 The width of the abstract should be smaller than the
 normal column width by 0.6 cm on each side.
 The abstract text should be 10 point roman, single-spaced.
 The abstract should be a concise summary of the general thesis and conclusions of the paper.
 It should be no longer than 200 words.
 ### Text
 Begin typing the main body of the text immediately after the abstract, continuing in two columns.
 The text should be 11 point roman, single-spaced.
 Indent 0.4 cm when starting a new paragraph, except for the first paragraph in a section.
 ### Sections
 Use numbered sections (Arabic numerals) to facilitate cross references.
 Number subsections with the section number and the subsection number separated by a dot, in Arabic numerals, e.g.,
 > 1 Introduction
 or
 > 6.1 File Format
 ### Footnotes
 Put footnotes at the bottom of the page and use 9 point font.
 They may be numbered or referred to by asterisks or other symbols.
 Footnotes should be separated from the text by a line.
 ### Figures and tables
 Place figures and tables in the paper near where they are first discussed, rather than at the end, if possible.
 Wide figures/tables may run across both columns.
 To accommodate people who are color-blind (as well as those printing with black-and-white printers), grayscale readability is strongly encouraged.
 Color is not forbidden, but authors should ensure that tables and figures do not rely solely on color to convey critical distinctions.
 **Captions:**
 Provide a caption for every figure/table; number each one sequentially in the form:
 > Figure 1: Caption of the Figure.
 and
 > Table 1: Caption of the Table.
 Captions should be placed below figures/tables, in 10 point roman type.
 Captions that are one line are centered.
 Captions longer than one line are left-aligned.
 ### Hyperlinks
 Within-document and external hyperlinks should be dark blue (hex #000099), not underlined or boxed.
 ### Non-English Text
 Text in languages other than English should be accompanied by translations into English, and text in scripts other than Latin should \emph{also} be accompanied by transliterations into Latin script, since not all readers can recognize non-Latin characters easily.
 For example, παράδειγμα *paradeigma* ‘example’ is a Greek word, and this is a Greek sentence:
 > Αυτό είναι ένα παράδειγμα.  
 > auto einai ena paradeigma.  
 > ‘This is an example.’
 ### Citations
 Citations within the text appear in parentheses (Gusfield, 1997), or, if the author's name appears in the text itself: Gusfield (1997).
 Append lowercase letters to the year in cases of ambiguities.
 Cite papers with two authors using both authors' names (Aho and Ullman, 1972), but cite papers with more than two authors by the first author's name and ``et al.'' (Chandra et al., 1981).
 Collapse multiple citations into a single pair of parentheses (Gusfield, 1997; Aho and Ullman, 1972).
 Refrain from using full citations as sentence constituents.
 Instead of
 > (Gusfield, 1997) showed that ...  
 > In (Gusfield, 1997), ...''
 write
 > Gusfield (1997) showed that ...  
 > In Gusfield (1997), ...
 Submissions should accurately reference prior and related work, including code and data.
 If a piece of prior work appeared in multiple venues, the version that appeared in a refereed, archival venue should be referenced.
 If multiple versions of a piece of prior work exist, the one used by the authors should be referenced.
 ### Acknowledgments
 The acknowledgments should go immediately before the references.
 Do not number the acknowledgments section.
 Do not include this section in the review version.
 ### References
 Gather the full set of references together under the unnumbered section heading **References**.
 Place the References section before any Appendices.
 Arrange the references alphabetically by first author, rather than by order of occurrence in the text.
 Provide as complete a citation as possible, using a consistent format, such as the [one for Computational Linguistics](http://cljournal.org/style_guide_refs.html) or the one in the [Publication Manual of the American Psychological Association](https://apastyle.apa.org/products/publication-manual-7th-edition).
 Use full names for authors, not just initials.
 Authors should not rely on automated citation indices to provide accurate references for prior and related work.
 As part of our work to make ACL materials more widely used and cited outside of our discipline, ACL has registered as a CrossRef member, as a registrant of Digital Object Identifiers (DOIs), the standard for registering permanent URNs for referencing scholarly materials.
 All references are required to contain DOIs of all cited works when possible, or, as a second resort, links to ACL Anthology pages.
 Appropriate records should be found for most materials in the current [ACL Anthology](https://aclweb.org/anthology/).
 Example article in a journal:
 > Rie Kubota Ando and Tong Zhang. 2005. [A framework for learning predictive structures from multiple tasks and unlabeled data](https://www.jmlr.org/papers/v6/ando05a.html). *Journal of Machine Learning Research*, 6:1817–1853.
 Example paper in non-ACL proceedings, with DOI:
 > Galen Andrew and Jianfeng Gao. 2007. [Scalable training of L1-regularized log-linear models](https://doi.org/10.1145/1273496.1273501). In *Proceedings of the 24th International Conference on Machine Learning*, pages 33–40.
 Example ACL Anthology paper with DOI:
 > James Goodman, Andreas Vlachos, and Jason Naradowsky. 2016. [Noise reduction and targeted exploration in imitation learning for Abstract Meaning Representation parsing](http://dx.doi.org/10.18653/v1/P16-1001). In *Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)*, pages 1–45711, Berlin, Germany. Association for Computational Linguistics.
 Example ACL Anthology paper without DOI:
 > Benjamin Börschinger and Mark Johnson. 2011. [A particle filter algorithm for Bayesian word segmentation](https://www.aclweb.org/anthology/U11-1004/). In *Proceedings of the Australasian Language Technology Association Workshop 2011*, pages 10–44718, Canberra, Australia.
 Example arXiv paper:
 > Mohammad Sadegh Rasooli and Joel R. Tetreault. 2015. [Yara parser: A fast and accurate dependency parser](http://arxiv.org/abs/1503.06733). *Computing Research Repository*, arXiv:1503.06733. Version 2.
 ## Appendices
 Appendices are material that can be read, and include lemmas, formulas, proofs, and tables that are not critical to the reading and understanding of the paper.
 Letter them in sequence and provide an informative title:
 > Appendix A. Title of Appendix
 The appendices come after the references.
 Review versions of appendices must follow the same anonymity guidelines as the main paper.
 ## Supplementary Material
 Submissions may include non-readable supplementary material used in the work and described in the paper.
 Any accompanying software and/or data should include licenses and documentation of research review as appropriate.
 Supplementary material may report preprocessing decisions, model parameters, and other details necessary for the replication of the experiments reported in the paper.
 Seemingly small preprocessing decisions can sometimes make a large difference in performance, so it is crucial to record such decisions to precisely characterize state-of-the-art methods.
 Nonetheless, supplementary material should be supplementary (rather than central) to the paper.
 **Submissions that misuse the supplementary material may be rejected without review.**
 Supplementary material may include explanations or details of proofs or derivations that do not fit into the paper, lists of features or feature templates, sample inputs and outputs for a system, pseudo-code or source code, and data.
 (Source code and data should be separate uploads, rather than part of the paper).
 The paper should not rely on the supplementary material: while the paper may refer to and cite the supplementary material and the supplementary material will be available to the reviewers, they will not be asked to review the supplementary material.
 Review versions of supplementary material must follow the same anonymity guidelines as the main paper.
 ## Credits
 This document has been adapted from the instructions for earlier ACL and NAACL proceedings, including those for
 ACL 2020 by Steven Bethard, Ryan Cotterell and Rui Yan,
 ACL 2019 by Douwe Kiela and Ivan Ivan Vulić,
 NAACL 2019 by Stephanie Lukin and Alla Roskovskaya,
 ACL 2018 by Shay Cohen, Kevin Gimpel, and Wei Lu,
 NAACL 2018 by Margaret Mitchell and Stephanie Lukin,
 BibTeX suggestions for (NA)ACL 2017/2018 from Jason Eisner,
 ACL 2017 by Dan Gildea and Min-Yen Kan,
 NAACL 2017 by Margaret Mitchell,
 ACL 2012 by Maggie Li and Michael White,
 ACL 2010 by Jing-Shin Chang and Philipp Koehn,
 ACL 2008 by Johanna D. Moore, Simone Teufel, James Allan, and Sadaoki Furui,
 ACL 2005 by Hwee Tou Ng and Kemal Oflazer,
 ACL 2002 by Eugene Charniak and Dekang Lin,
 and earlier ACL and EACL formats written by several people, including
 John Chen, Henry S. Thompson and Donald Walker.
 Additional elements were taken from the formatting instructions of the *International Joint Conference on Artificial Intelligence* and the *Conference on Computer Vision and Pattern Recognition*.
--- a/skills/mlops/ml-paper-writing/templates/colm2025/README.md
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/README.md
@ -0,0 +1,3 @@
 # Template
 Template and style files for CoLM 2025
--- a/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.bib
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.bib
@ -0,0 +1,11 @@
@inproceedings{Vaswani+2017,
 author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
 booktitle = {Advances in Neural Information Processing Systems},
 pages = {},
 publisher = {Curran Associates, Inc.},
 title = {Attention is All you Need},
 url = {https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
 volume = {30},
 year = {2017}
 }
--- a/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.bst
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.bst
--- a/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.pdf
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.pdf
--- a/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.sty
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.sty
@ -0,0 +1,218 @@
 %%%% COLM Macros (LaTex)
 %%%% Adapted by Yoav Artzi and Sasha Rush from Hugo Larochelle's adaptation for ICLR, which has been adaptated from the NIPS stylefile Macros
 %%%% Style File
 %%%% Dec 12, 1990   Rev Aug 14, 1991; Sept, 1995; April, 1997; April, 1999; October 2014
 % This file can be used with Latex2e whether running in main mode, or
 % 2.09 compatibility mode.
 %
 % If using main mode, you need to include the commands
 %             \documentclass{article}
 %             \usepackage{colm14submit_e}
 %
 % Define options
 \newif\ifcolmsubmission
 \newif\ifcolmpreprint
 \newif\ifcolmfinal
 % Set submission as default
 \colmsubmissiontrue
 \colmpreprintfalse
 \colmfinalfalse
 % Define option handling
 \DeclareOption{submission}{\colmsubmissiontrue\colmpreprintfalse\colmfinalfalse}
 \DeclareOption{preprint}{\colmsubmissionfalse\colmpreprinttrue\colmfinalfalse}
 \DeclareOption{final}{\colmsubmissionfalse\colmpreprintfalse\colmfinaltrue}
 \ProcessOptions\relax
 % Palatino font
 \RequirePackage{tgpagella} % text only
 \RequirePackage{mathpazo}  % math & text
 \RequirePackage{inconsolata} % for tt font
 % Change the overall width of the page.  If these parameters are
 %       changed, they will require corresponding changes in the
 %       maketitle section.
 %
 \usepackage{eso-pic} % used by \AddToShipoutPicture
 \RequirePackage{fancyhdr}
 \RequirePackage{natbib}
 % modification to natbib citations
 \setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
 \renewcommand{\topfraction}{0.95}   % let figure take up nearly whole page
 \renewcommand{\textfraction}{0.05}  % let figure take up nearly whole page
 % Specify the dimensions of each page
 \setlength{\paperheight}{11in}
 \setlength{\paperwidth}{8.5in}
 \oddsidemargin .5in    %   Note \oddsidemargin = \evensidemargin
 \evensidemargin .5in
 \marginparwidth 0.07 true in
 %\marginparwidth 0.75 true in
 %\topmargin 0 true pt           % Nominal distance from top of page to top of
 %\topmargin 0.125in
 \topmargin -0.625in
 \addtolength{\headsep}{0.25in}
 \textheight 9.0 true in       % Height of text (including footnotes & figures)
 \textwidth 5.5 true in        % Width of text line.
 \widowpenalty=10000
 \clubpenalty=10000
 % \thispagestyle{empty}        \pagestyle{empty}
 \flushbottom \sloppy
 % We're never going to need a table of contents, so just flush it to
 % save space --- suggested by drstrip@sandia-2
 \def\addcontentsline#1#2#3{}
 % Title stuff, taken from deproc.
 \def\maketitle{\par
 \begingroup
   \def\thefootnote{\fnsymbol{footnote}}
   \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author
                                                        % name centering
 %   The footnote-mark was overlapping the footnote-text,
 %   added the following to fix this problem               (MK)
   \long\def\@makefntext##1{\parindent 1em\noindent
                            \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1}
   \@maketitle \@thanks
 \endgroup
 \setcounter{footnote}{0}
 \let\maketitle\relax \let\@maketitle\relax
 \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
 % The toptitlebar has been raised to top-justify the first page
 \usepackage{fancyhdr}
 \pagestyle{fancy}
 \renewcommand{\headrulewidth}{1.5pt}
 \fancyhead{}
 % Title (includes both anonymized and non-anonymized versions)
 \def\@maketitle{\vbox{\hsize\textwidth
 %\linewidth\hsize \vskip 0.1in \toptitlebar \centering
 {\Large\bf \@title\par}
 %\bottomtitlebar % \vskip 0.1in %  minus
 \ifcolmfinal
    \lhead{Published as a conference paper at COLM 2025}
    \def\And{\end{tabular}\hfil\linebreak[0]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
  \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
    \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}%
 \else\ifcolmpreprint
 \lhead{Preprint. Under review.}
 \def\And{\end{tabular}\hfil\linebreak[0]\hfil
        \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
 \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
        \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
 \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}%
 \else
 \lhead{Under review as a conference paper at COLM 2025}
   \def\And{\end{tabular}\hfil\linebreak[0]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
  \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
    \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}Anonymous authors\\Paper under double-blind review\end{tabular}%
 \fi\fi
 \vskip 0.3in minus 0.1in}}
 \renewenvironment{abstract}{\vskip.075in\centerline{\large\bf
 Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
 % Less leading in most fonts (due to the narrow columns)
 % The choices were between 1-pt and 1.5-pt leading
 %\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} % got rid of @ (MK)
 \def\normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
 \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
 \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
 \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
 \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
 \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
 \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
 \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
 \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
 \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
 % sections with less space
 \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
    -0.5ex minus -.2ex}{1.5ex plus 0.3ex
 minus0.2ex}{\large\bf\raggedright}}
 \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
 -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
 \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex
 plus      -0.5ex minus -.2ex}{0.5ex plus
 .2ex}{\normalsize\bf\itshape\raggedright}}
 \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
 \def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
  0.5ex minus .2ex}{-1em}{\normalsize\it}}
 \def\subsubsubsection{\vskip
 5pt{\noindent\normalsize\raggedright}}
 % Footnotes
 \footnotesep 6.65pt %
 \skip\footins 9pt plus 4pt minus 2pt
 \def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt }
 \setcounter{footnote}{0}
 % Lists and paragraphs
 \parindent 0pt
 \topsep 4pt plus 1pt minus 2pt
 \partopsep 1pt plus 0.5pt minus 0.5pt
 \itemsep 2pt plus 1pt minus 0.5pt
 \parsep 2pt plus 1pt minus 0.5pt
 \parskip .5pc
 %\leftmargin2em
 \leftmargin3pc
 \leftmargini\leftmargin \leftmarginii 2em
 \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
 %\labelsep \labelsep 5pt
 \def\@listi{\leftmargin\leftmargini}
 \def\@listii{\leftmargin\leftmarginii
   \labelwidth\leftmarginii\advance\labelwidth-\labelsep
   \topsep 2pt plus 1pt minus 0.5pt
   \parsep 1pt plus 0.5pt minus 0.5pt
   \itemsep \parsep}
 \def\@listiii{\leftmargin\leftmarginiii
    \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
    \topsep 1pt plus 0.5pt minus 0.5pt
    \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
    \itemsep \topsep}
 \def\@listiv{\leftmargin\leftmarginiv
     \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
 \def\@listv{\leftmargin\leftmarginv
     \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
 \def\@listvi{\leftmargin\leftmarginvi
     \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
 \abovedisplayskip 7pt plus2pt minus5pt%
 \belowdisplayskip \abovedisplayskip
 \abovedisplayshortskip  0pt plus3pt%
 \belowdisplayshortskip  4pt plus3pt minus3pt%
 \def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip}
 \def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip
 .09in} %
 %Reduced second vskip to compensate for adding the strut in \@author
--- a/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.tex
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/colm2025_conference.tex
@ -0,0 +1,305 @@
 \documentclass{article} % For LaTeX2e
 \usepackage[submission]{colm2025_conference}
 \usepackage{microtype}
 \usepackage{hyperref}
 \usepackage{url}
 \usepackage{booktabs}
 \usepackage{lineno}
 \definecolor{darkblue}{rgb}{0, 0, 0.5}
 \hypersetup{colorlinks=true, citecolor=darkblue, linkcolor=darkblue, urlcolor=darkblue}
 \title{Formatting Instructions for COLM 2025 \\ Conference Submissions}
 % Authors must not appear in the submitted version. They should be hidden
 % as long as the \colmfinalcopy macro remains commented out below.
 % Non-anonymous submissions will be rejected without review.
 \author{Antiquus S.~Hippocampus, Natalia Cerebro \& Amelie P. Amygdale \thanks{ Use footnote for providing further information
 about author (webpage, alternative address)---\emph{not} for acknowledging
 funding agencies.  Funding acknowledgements go at the end of the paper.} \\
 Department of Computer Science\\
 Cranberry-Lemon University\\
 Pittsburgh, PA 15213, USA \\
 \texttt{\{hippo,brain,jen\}@cs.cranberry-lemon.edu} \\
 \And
 Ji Q. Ren \& Yevgeny LeNet \\
 Department of Computational Neuroscience \\
 University of the Witwatersrand \\
 Joburg, South Africa \\
 \texttt{\{robot,net\}@wits.ac.za} \\
 \AND
 Coauthor \\
 Affiliation \\
 Address \\
 \texttt{email}
 }
 % The \author macro works with any number of authors. There are two commands
 % used to separate the names and addresses of multiple authors: \And and \AND.
 %
 % Using \And between authors leaves it to \LaTeX{} to determine where to break
 % the lines. Using \AND forces a linebreak at that point. So, if \LaTeX{}
 % puts 3 of 4 authors names on the first line, and the last on the second
 % line, try using \AND instead of \And before the third author name.
 \newcommand{\fix}{\marginpar{FIX}}
 \newcommand{\new}{\marginpar{NEW}}
 \begin{document}
 \ifcolmsubmission
 \linenumbers
 \fi
 \maketitle
 \begin{abstract}
 The abstract paragraph should be indented 1/2~inch (3~picas) on both left and
 right-hand margins. Use 10~point type, with a vertical spacing of 11~points.
 The word \textit{Abstract} must be centered and in point size 12. Two
 line spaces precede the abstract. The abstract must be limited to one
 paragraph.
 \end{abstract}
 \section{Submission of conference papers to COLM 2025}
 COLM requires electronic submissions, processed by
 \url{https://openreview.net/}. See COLM's website for more instructions.
 The format for the submissions is a variant of the NeurIPS and ICLR formats.
 Please read carefully the instructions below, and follow them
 faithfully.
 \subsection{Style}
 Papers to be submitted to COLM 2025 must be prepared according to the
 instructions presented here.
 %% Please note that we have introduced automatic line number generation
 %% into the style file for \LaTeXe. This is to help reviewers
 %% refer to specific lines of the paper when they make their comments. Please do
 %% NOT refer to these line numbers in your paper as they will be removed from the
 %% style file for the final version of accepted papers.
 Authors are required to use the COLM \LaTeX{} style files obtainable at the
 COLM website. Please make sure you use the current files and
 not previous versions. Tweaking the style files may be grounds for rejection.
 \subsubsection{Copy Options}
 If your paper is ultimately accepted, the option {\tt
  {\textbackslash}final} should be set  for the {\tt {\textbackslash}usepackage[submission]\{colm2025\_conference\}} command for the camera ready version. The {\tt submission} options is the default, and is to be used for all submissions during the review process. It also turns on the line numbers. If you wish to submit a preprint, the option {\tt preprint} should be used.
 \subsection{Retrieval of style files}
 The style files for COLM and other conference information are available online at:
 \begin{center}
   \url{http://www.colmweb.org/}
 \end{center}
 The file \verb+colm2025_conference.pdf+ contains these
 instructions and illustrates the
 various formatting requirements your COLM paper must satisfy.
 Submissions must be made using \LaTeX{} and the style files
 \verb+colm2025_conference.sty+ and \verb+colm2025_conference.bst+ (to be used with \LaTeX{}2e). The file
 \verb+colm2025_conference.tex+ may be used as a ``shell'' for writing your paper. All you
 have to do is replace the author, title, abstract, and text of the paper with
 your own.
 The formatting instructions contained in these style files are summarized in
 sections \ref{gen_inst}, \ref{headings}, and \ref{others} below.
 \section{General formatting instructions}
 \label{gen_inst}
 The text must be confined within a rectangle 5.5~inches (33~picas) wide and
 9~inches (54~picas) long. The left margin is 1.5~inch (9~picas).
 Use 10~point type with a vertical spacing of 11~points. Palatino is the
 preferred typeface throughout, and is mandatory for the main text. Paragraphs are separated by 1/2~line space, with no indentation. 
 Paper title is 17~point and left-aligned.
 All pages should start at 1~inch (6~picas) from the top of the page.
 Please verify that any custom header information you may add does not override the style defined in this document. This has been known to occur especially when submissions are converted to a new template from a previous one (i.e., for re-submission to a different venue). 
 Authors' names are
 set in boldface, and each name is placed above its corresponding
 address. The lead author's name is to be listed first, and
 the co-authors' names are set to follow. Authors sharing the
 same address can be on the same line.
 Please pay special attention to the instructions in section \ref{others}
 regarding figures, tables, acknowledgements, and references.
 There will be a strict upper limit of 9 pages for the main text of the initial submission, with unlimited additional pages for citations. 
 We strongly recommend following arXiv's guidelines for making your paper friendly for HTML conversion: \url{https://info.arxiv.org/help/submit_latex_best_practices.html}.
 \section{Headings: first level}
 \label{headings}
 First level headings are in lower case (except for first word and proper nouns), bold face,
 flush left and in point size 12. One line space before the first level
 heading and 1/2~line space after the first level heading.
 \subsection{Headings: second level}
 Second level headings are in lower case (except for first word and proper nouns), bold face,
 flush left and in point size 10. One line space before the second level
 heading and 1/2~line space after the second level heading.
 \subsubsection{Headings: third level}
 Third level headings are in lower case (except for first word and proper nouns), bold face, italics, 
 flush left and in point size 10. One line space before the third level
 heading and 1/2~line space after the third level heading.
 \section{Citations, figures, tables, references}\label{others}
 These instructions apply to everyone, regardless of the formatter being used.
 \subsection{Citations within the text}
 Citations within the text should be based on the \texttt{natbib} package
 and include the authors' last names and year (with the ``et~al.'' construct
 for more than two authors). When the authors or the publication are
 included in the sentence, the citation should not be in parenthesis using \verb|\citet{}| (as
 in ``See \citet{Vaswani+2017} for more information.''). Otherwise, the citation
 should be in parenthesis using \verb|\citep{}| (as in ``Transformers are a key tool
 for developing language models~\citep{Vaswani+2017}.'').
 The corresponding references are to be listed in alphabetical order of
 authors, in the \textsc{References} section. As to the format of the
 references themselves, any style is acceptable as long as it is used
 consistently.
 \subsection{Footnotes}
 Indicate footnotes with a number\footnote{Sample of the first footnote} in the
 text. Place the footnotes at the bottom of the page on which they appear.
 Precede the footnote with a horizontal rule of 2~inches
 (12~picas).\footnote{Sample of the second footnote}
 \subsection{Figures}
 All artwork must be neat, clean, and legible. Lines should be dark
 enough for purposes of reproduction; art work should not be
 hand-drawn. Any text within the figure must be readable. We ask to not use font sizes below {\tt small}. We strongly recommend to use vector representations (e.g., pdf or svg) for all diagrams. 
 We strongly recommend positioning all figures at the top or bottom of the page.
 The figure number and caption always appear below the figure. Place one line space before the figure caption, and one line space after the figure. The figure caption is lower case (except for first word and proper nouns); figures are numbered consecutively.
 Make sure the figure caption does not get separated from the figure.
 Leave sufficient space to avoid splitting the figure and figure caption.
 You may use color figures.
 However, it is best for the
 figure captions and the paper body to make sense if the paper is printed
 either in black/white or in color.
 \begin{figure}[t]
 \begin{center}
 %\framebox[4.0in]{$\;$}
 \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
 \end{center}
 \caption{Sample figure caption.}
 \end{figure}
 \subsection{Tables}
 All tables must be centered, neat, clean and legible. Do not use hand-drawn tables. The table number and title always appear below the table. See Table~\ref{sample-table}. Please do not use font sizes below {\tt small} in tables. We recommend using {\tt booktabs} or a similar package to style tables. 
 We strongly recommend positioning all tables at the top or bottom of the page.
 Place one line space before the table title, one line space after the table title, and one line space after the table. The table title must be lowercase (except for first word and proper nouns); tables are numbered consecutively.
 \begin{table}[t]
 \begin{center}
 \begin{tabular}{ll}
 \toprule
 \multicolumn{1}{c}{\bf PART}  &\multicolumn{1}{c}{\bf DESCRIPTION} \\
 \midrule
 Dendrite         &Input terminal \\
 Axon             &Output terminal \\
 Soma             &Cell body (contains cell nucleus) \\
 \bottomrule
 \end{tabular}
 \end{center}
 \caption{Sample table title}\label{sample-table}
 \end{table}
 \section{Final instructions}
 Do not change any aspects of the formatting parameters in the style files.
 In particular, do not modify the width or length of the rectangle the text
 should fit into, and do not change font sizes (except perhaps in the
 \textsc{References} section; see below). Please note that pages should be
 numbered.
 \section{Preparing PostScript or PDF files}
 Please prepare PostScript or PDF files with paper size ``US Letter'', and
 not, for example, ``A4''. The -t
 letter option on dvips will produce US Letter files.
 Consider directly generating PDF files using \verb+pdflatex+
 (especially if you are a MiKTeX user).
 PDF figures must be substituted for EPS figures, however.
 Otherwise, please generate your PostScript and PDF files with the following commands:
 \begin{verbatim}
 dvips mypaper.dvi -t letter -Ppdf -G0 -o mypaper.ps
 ps2pdf mypaper.ps mypaper.pdf
 \end{verbatim}
 \subsection{Margins in LaTeX}
 Most of the margin problems come from figures positioned by hand using
 \verb+\special+ or other commands. We suggest using the command
 \verb+\includegraphics+
 from the graphicx package. Always specify the figure width as a multiple of
 the line width as in the example below using .eps graphics
 \begin{verbatim}
   \usepackage[dvips]{graphicx} ...
   \includegraphics[width=0.8\linewidth]{myfile.eps}
 \end{verbatim}
 or % Apr 2009 addition
 \begin{verbatim}
   \usepackage[pdftex]{graphicx} ...
   \includegraphics[width=0.8\linewidth]{myfile.pdf}
 \end{verbatim}
 for .pdf graphics.
 See section~4.4 in the graphics bundle documentation (\url{http://www.ctan.org/tex-archive/macros/latex/required/graphics/grfguide.ps})
 A number of width problems arise when LaTeX cannot properly hyphenate a
 line. Please give LaTeX hyphenation hints using the \verb+\-+ command.
 \section*{Author Contributions}
 If you'd like to, you may include  a section for author contributions as is done
 in many journals. This is optional and at the discretion of the authors.
 \section*{Acknowledgments}
 Use unnumbered first level headings for the acknowledgments. All
 acknowledgments, including those to funding agencies, go at the end of the paper.
 \section*{Ethics Statement}
 Authors can add an optional ethics statement to the paper. 
 For papers that touch on ethical issues, this section will be evaluated as part of the review process. The ethics statement should come at the end of the paper. It does not count toward the page limit, but should not be more than 1 page. 
 \bibliography{colm2025_conference}
 \bibliographystyle{colm2025_conference}
 \appendix
 \section{Appendix}
 You may include other additional sections here.
 \end{document}
--- a/skills/mlops/ml-paper-writing/templates/colm2025/fancyhdr.sty
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/fancyhdr.sty
@ -0,0 +1,485 @@
 % fancyhdr.sty version 3.2
 % Fancy headers and footers for LaTeX.
 % Piet van Oostrum, 
 % Dept of Computer and Information Sciences, University of Utrecht,
 % Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
 % Telephone: +31 30 2532180. Email: piet@cs.uu.nl
 % ========================================================================
 % LICENCE:
 % This file may be distributed under the terms of the LaTeX Project Public
 % License, as described in lppl.txt in the base LaTeX distribution.
 % Either version 1 or, at your option, any later version.
 % ========================================================================
 % MODIFICATION HISTORY:
 % Sep 16, 1994
 % version 1.4: Correction for use with \reversemargin
 % Sep 29, 1994:
 % version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
 % Oct 4, 1994:
 % version 1.6: Reset single spacing in headers/footers for use with
 % setspace.sty or doublespace.sty
 % Oct 4, 1994:
 % version 1.7: changed \let\@mkboth\markboth to
 % \def\@mkboth{\protect\markboth} to make it more robust
 % Dec 5, 1994:
 % version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
 % importantly) use the \chapter/sectionmark definitions from ps@headings if
 % they exist (which should be true for all standard classes).
 % May 31, 1995:
 % version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
 % construction in the doc did not work properly with the fancyplain style. 
 % June 1, 1995:
 % version 1.91: The definition of \@mkboth wasn't restored on subsequent
 % \pagestyle{fancy}'s.
 % June 1, 1995:
 % version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
 % \pagestyle{fancy} would erroneously select the plain version.
 % June 1, 1995:
 % version 1.93: \fancypagestyle command added.
 % Dec 11, 1995:
 % version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
 % CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
 % position (old hardcoded value of .3\normalbaselineskip is far too high
 % when used with very small footer fonts).
 % Jan 31, 1996:
 % version 1.95: call \@normalsize in the reset code if that is defined,
 % otherwise \normalsize.
 % this is to solve a problem with ucthesis.cls, as this doesn't
 % define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
 % work as this is optimized to do very little, so there \@normalsize should
 % be called. Hopefully this code works for all versions of LaTeX known to
 % mankind.  
 % April 25, 1996:
 % version 1.96: initialize \headwidth to a magic (negative) value to catch
 % most common cases that people change it before calling \pagestyle{fancy}.
 % Note it can't be initialized when reading in this file, because
 % \textwidth could be changed afterwards. This is quite probable.
 % We also switch to \MakeUppercase rather than \uppercase and introduce a
 % \nouppercase command for use in headers. and footers.
 % May 3, 1996:
 % version 1.97: Two changes:
 % 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
 % for the chapter and section marks. The current version of amsbook and
 % amsart classes don't seem to need them anymore. Moreover the standard
 % latex classes don't use \markboth if twoside isn't selected, and this is
 % confusing as \leftmark doesn't work as expected.
 % 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
 % in the amsbook and amsart classes, that make global changes to \topskip,
 % which are reset in \ps@empty. Hopefully this doesn't break other things.
 % May 7, 1996:
 % version 1.98:
 % Added % after the line  \def\nouppercase
 % May 7, 1996:
 % version 1.99: This is the alpha version of fancyhdr 2.0
 % Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
 % Changed \headrulewidth, \footrulewidth, \footruleskip to
 % macros rather than length parameters, In this way they can be
 % conditionalized and they don't consume length registers. There is no need
 % to have them as length registers unless you want to do calculations with
 % them, which is unlikely. Note that this may make some uses of them
 % incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
 % May 10, 1996:
 % version 1.99a:
 % Added a few more % signs
 % May 10, 1996:
 % version 1.99b:
 % Changed the syntax of \f@nfor to be resistent to catcode changes of :=
 % Removed the [1] from the defs of \lhead etc. because the parameter is
 % consumed by the \@[xy]lhead etc. macros.
 % June 24, 1997:
 % version 1.99c:
 % corrected \nouppercase to also include the protected form of \MakeUppercase
 % \global added to manipulation of \headwidth.
 % \iffootnote command added.
 % Some comments added about \@fancyhead and \@fancyfoot.
 % Aug 24, 1998
 % version 1.99d
 % Changed the default \ps@empty to \ps@@empty in order to allow
 % \fancypagestyle{empty} redefinition.
 % Oct 11, 2000
 % version 2.0
 % Added LPPL license clause.
 %
 % A check for \headheight is added. An errormessage is given (once) if the
 % header is too large. Empty headers don't generate the error even if
 % \headheight is very small or even 0pt. 
 % Warning added for the use of 'E' option when twoside option is not used.
 % In this case the 'E' fields will never be used.
 %
 % Mar 10, 2002
 % version 2.1beta
 % New command: \fancyhfoffset[place]{length}
 % defines offsets to be applied to the header/footer to let it stick into
 % the margins (if length > 0).
 % place is like in fancyhead, except that only E,O,L,R can be used.
 % This replaces the old calculation based on \headwidth and the marginpar
 % area.
 % \headwidth will be dynamically calculated in the headers/footers when
 % this is used.
 %
 % Mar 26, 2002
 % version 2.1beta2
 % \fancyhfoffset now also takes h,f as possible letters in the argument to
 % allow the header and footer widths to be different.
 % New commands \fancyheadoffset and \fancyfootoffset added comparable to
 % \fancyhead and \fancyfoot.
 % Errormessages and warnings have been made more informative.
 %
 % Dec 9, 2002
 % version 2.1
 % The defaults for \footrulewidth, \plainheadrulewidth and
 % \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
 % someone inadvertantly uses \setlength to change any of these, the value
 % of \z@skip will not be changed, rather an errormessage will be given.
 % March 3, 2004
 % Release of version 3.0
 % Oct 7, 2004
 % version 3.1
 % Added '\endlinechar=13' to \fancy@reset to prevent problems with
 % includegraphics in header when verbatiminput is active.
 % March 22, 2005
 % version 3.2
 % reset \everypar (the real one) in \fancy@reset because spanish.ldf does
 % strange things with \everypar between << and >>.
 \def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
 \def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
                                   \fancy@gbl\def#1{#2\strut}\fi}
 \let\fancy@gbl\global
 \def\@fancyerrmsg#1{%
        \ifx\PackageError\undefined
        \errmessage{#1}\else
        \PackageError{Fancyhdr}{#1}{}\fi}
 \def\@fancywarning#1{%
        \ifx\PackageWarning\undefined
        \errmessage{#1}\else
        \PackageWarning{Fancyhdr}{#1}{}\fi}
 % Usage: \@forc \var{charstring}{command to be executed for each char}
 % This is similar to LaTeX's \@tfor, but expands the charstring.
 \def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
 \def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
                                    \f@@rc#1#2\f@@rc{#3}\fi}
 \def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
 % Usage: \f@nfor\name:=list\do{body}
 % Like LaTeX's \@for but an empty list is treated as a list with an empty
 % element
 \newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
    \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
 % Usage: \def@ult \cs{defaults}{argument}
 % sets \cs to the characters from defaults appearing in argument
 % or defaults if it would be empty. All characters are lowercased.
 \newcommand\def@ult[3]{%
    \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
    \def#1{}%
    \@forc\tmpf@ra{#2}%
        {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
    \ifx\@empty#1\def#1{#2}\fi}
 % 
 % \if@in <char><set><truecase><falsecase>
 %
 \newcommand{\if@in}[4]{%
    \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
    \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
 \newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
                                     {\f@ncyhf\fancyhead h[]}}
 \newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
                                     {\f@ncyhf\fancyfoot f[]}}
 \newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
                                   {\f@ncyhf\fancyhf{}[]}}
 % New commands for offsets added
 \newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
                                           {\f@ncyhfoffs\fancyheadoffset h[]}}
 \newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
                                           {\f@ncyhfoffs\fancyfootoffset f[]}}
 \newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
                                         {\f@ncyhfoffs\fancyhfoffset{}[]}}
 % The header and footer fields are stored in command sequences with
 % names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
 % and <z> from [hf].
 \def\f@ncyhf#1#2[#3]#4{%
    \def\temp@c{}%
    \@forc\tmpf@ra{#3}%
        {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
            {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
    \ifx\@empty\temp@c\else
        \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
          [#3]}%
    \fi
    \f@nfor\temp@c{#3}%
        {\def@ult\f@@@eo{eo}\temp@c
         \if@twoside\else
           \if\f@@@eo e\@fancywarning
             {\string#1's `E' option without twoside option is useless}\fi\fi
         \def@ult\f@@@lcr{lcr}\temp@c
         \def@ult\f@@@hf{hf}{#2\temp@c}%
         \@forc\f@@eo\f@@@eo
             {\@forc\f@@lcr\f@@@lcr
                 {\@forc\f@@hf\f@@@hf
                     {\expandafter\fancy@def\csname
                      f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
                      {#4}}}}}}
 \def\f@ncyhfoffs#1#2[#3]#4{%
    \def\temp@c{}%
    \@forc\tmpf@ra{#3}%
        {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
            {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
    \ifx\@empty\temp@c\else
        \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
          [#3]}%
    \fi
    \f@nfor\temp@c{#3}%
        {\def@ult\f@@@eo{eo}\temp@c
         \if@twoside\else
           \if\f@@@eo e\@fancywarning
             {\string#1's `E' option without twoside option is useless}\fi\fi
         \def@ult\f@@@lcr{lr}\temp@c
         \def@ult\f@@@hf{hf}{#2\temp@c}%
         \@forc\f@@eo\f@@@eo
             {\@forc\f@@lcr\f@@@lcr
                 {\@forc\f@@hf\f@@@hf
                     {\expandafter\setlength\csname
                      f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
                      {#4}}}}}%
     \fancy@setoffs}
 % Fancyheadings version 1 commands. These are more or less deprecated,
 % but they continue to work.
 \newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
 \def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
 \def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
 \newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
 \def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
 \def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
 \newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
 \def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
 \def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
 \newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
 \def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
 \def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
 \newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
 \def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
 \def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
 \newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
 \def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
 \def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
 \newlength{\fancy@headwidth}
 \let\headwidth\fancy@headwidth
 \newlength{\f@ncyO@elh}
 \newlength{\f@ncyO@erh}
 \newlength{\f@ncyO@olh}
 \newlength{\f@ncyO@orh}
 \newlength{\f@ncyO@elf}
 \newlength{\f@ncyO@erf}
 \newlength{\f@ncyO@olf}
 \newlength{\f@ncyO@orf}
 \newcommand{\headrulewidth}{0.4pt}
 \newcommand{\footrulewidth}{0pt}
 \newcommand{\footruleskip}{.3\normalbaselineskip}
 % Fancyplain stuff shouldn't be used anymore (rather
 % \fancypagestyle{plain} should be used), but it must be present for
 % compatibility reasons.
 \newcommand{\plainheadrulewidth}{0pt}
 \newcommand{\plainfootrulewidth}{0pt}
 \newif\if@fancyplain \@fancyplainfalse
 \def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
 \headwidth=-123456789sp %magic constant
 % Command to reset various things in the headers:
 % a.o.  single spacing (taken from setspace.sty)
 % and the catcode of ^^M (so that epsf files in the header work if a
 % verbatim crosses a page boundary)
 % It also defines a \nouppercase command that disables \uppercase and
 % \Makeuppercase. It can only be used in the headers and footers.
 \let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
 \def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
 \def\baselinestretch{1}%
 \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
     \expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
 \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
   \ifx\@normalsize\undefined \normalsize % for ucthesis.cls
   \else \@normalsize \fi
 \else% NFSS (2.09) present
  \@newbaseline%
 \fi}
 % Initialization of the head and foot text.
 % The default values still contain \fancyplain for compatibility.
 \fancyhf{} % clear all
 % lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
 % evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
 \if@twoside
  \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
  \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
 \else
  \fancyhead[l]{\fancyplain{}{\sl\rightmark}}
  \fancyhead[r]{\fancyplain{}{\sl\leftmark}}
 \fi
 \fancyfoot[c]{\rm\thepage} % page number
 % Use box 0 as a temp box and dimen 0 as temp dimen. 
 % This can be done, because this code will always
 % be used inside another box, and therefore the changes are local.
 \def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
  {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
    We now make it that large for the rest of the document.^^J
    This may cause the page layout to be inconsistent, however\@gobble}%
  \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
  \box0}
 % Put together a header or footer given the left, center and
 % right text, fillers at left and right and a rule.
 % The \lap commands put the text into an hbox of zero size,
 % so overlapping text does not generate an errormessage.
 % These macros have 5 parameters:
 % 1. LEFTSIDE BEARING % This determines at which side the header will stick
 %    out. When \fancyhfoffset is used this calculates \headwidth, otherwise
 %    it is \hss or \relax (after expansion).
 % 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
 % 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
 % 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
 % 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
 \def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
  \@fancyvbox\headheight{\hbox
    {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
      \parbox[b]{\headwidth}{\centering#3}\hfill
      \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
 \def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
    \@fancyvbox\footskip{\footrule
      \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
        \parbox[t]{\headwidth}{\centering#3}\hfill
        \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
 \def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
    \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
 \def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
    \vskip-\footruleskip\vskip-\footrulewidth
    \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
 \def\ps@fancy{%
 \@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
 %
 % Define \MakeUppercase for old LaTeXen.
 % Note: we used \def rather than \let, so that \let\uppercase\relax (from
 % the version 1 documentation) will still work.
 %
 \@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
 \@ifundefined{chapter}{\def\sectionmark##1{\markboth
 {\MakeUppercase{\ifnum \c@secnumdepth>\z@
 \thesection\hskip 1em\relax \fi ##1}}{}}%
 \def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
 \thesubsection\hskip 1em\relax \fi ##1}}}%
 {\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
 \@chapapp\ \thechapter. \ \fi ##1}}{}}%
 \def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
 \thesection. \ \fi ##1}}}}%
 %\csname ps@headings\endcsname % use \ps@headings defaults if they exist
 \ps@@fancy
 \gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
 % Initialize \headwidth if the user didn't
 %
 \ifdim\headwidth<0sp
 %
 % This catches the case that \headwidth hasn't been initialized and the
 % case that the user added something to \headwidth in the expectation that
 % it was initialized to \textwidth. We compensate this now. This loses if
 % the user intended to multiply it by a factor. But that case is more
 % likely done by saying something like \headwidth=1.2\textwidth. 
 % The doc says you have to change \headwidth after the first call to
 % \pagestyle{fancy}. This code is just to catch the most common cases were
 % that requirement is violated.
 %
    \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
 \fi}
 \def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
 \def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
 \let\ps@@empty\ps@empty
 \def\ps@@fancy{%
 \ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
 \def\@mkboth{\protect\markboth}%
 \def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
 \def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
 \def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
 \def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
 }
 % Default definitions for compatibility mode:
 % These cause the header/footer to take the defined \headwidth as width
 % And to shift in the direction of the marginpar area
 \def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
 \def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
 \let\fancy@Oelh\fancy@Oorh
 \let\fancy@Oerh\fancy@Oolh
 \let\fancy@Oolf\fancy@Oolh
 \let\fancy@Oorf\fancy@Oorh
 \let\fancy@Oelf\fancy@Oelh
 \let\fancy@Oerf\fancy@Oerh
 % New definitions for the use of \fancyhfoffset
 % These calculate the \headwidth from \textwidth and the specified offsets.
 \def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
                   \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
 \def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
                   \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
 \def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
                   \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
 \def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
                   \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
 \def\fancy@setoffs{%
 % Just in case \let\headwidth\textwidth was used
  \fancy@gbl\let\headwidth\fancy@headwidth
  \fancy@gbl\let\fancy@Oolh\fancy@offsolh
  \fancy@gbl\let\fancy@Oelh\fancy@offselh
  \fancy@gbl\let\fancy@Oorh\hss
  \fancy@gbl\let\fancy@Oerh\hss
  \fancy@gbl\let\fancy@Oolf\fancy@offsolf
  \fancy@gbl\let\fancy@Oelf\fancy@offself
  \fancy@gbl\let\fancy@Oorf\hss
  \fancy@gbl\let\fancy@Oerf\hss}
 \newif\iffootnote
 \let\latex@makecol\@makecol
 \def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
 \let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
 \def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
 \def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
 \def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
 \newcommand{\fancypagestyle}[2]{%
  \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}
--- a/skills/mlops/ml-paper-writing/templates/colm2025/math_commands.tex
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/math_commands.tex
@ -0,0 +1,508 @@
 %%%%% NEW MATH DEFINITIONS %%%%%
 \usepackage{amsmath,amsfonts,bm}
 % Mark sections of captions for referring to divisions of figures
 \newcommand{\figleft}{{\em (Left)}}
 \newcommand{\figcenter}{{\em (Center)}}
 \newcommand{\figright}{{\em (Right)}}
 \newcommand{\figtop}{{\em (Top)}}
 \newcommand{\figbottom}{{\em (Bottom)}}
 \newcommand{\captiona}{{\em (a)}}
 \newcommand{\captionb}{{\em (b)}}
 \newcommand{\captionc}{{\em (c)}}
 \newcommand{\captiond}{{\em (d)}}
 % Highlight a newly defined term
 \newcommand{\newterm}[1]{{\bf #1}}
 % Figure reference, lower-case.
 \def\figref#1{figure~\ref{#1}}
 % Figure reference, capital. For start of sentence
 \def\Figref#1{Figure~\ref{#1}}
 \def\twofigref#1#2{figures \ref{#1} and \ref{#2}}
 \def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}}
 % Section reference, lower-case.
 \def\secref#1{section~\ref{#1}}
 % Section reference, capital.
 \def\Secref#1{Section~\ref{#1}}
 % Reference to two sections.
 \def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}}
 % Reference to three sections.
 \def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}}
 % Reference to an equation, lower-case.
 \def\eqref#1{equation~\ref{#1}}
 % Reference to an equation, upper case
 \def\Eqref#1{Equation~\ref{#1}}
 % A raw reference to an equation---avoid using if possible
 \def\plaineqref#1{\ref{#1}}
 % Reference to a chapter, lower-case.
 \def\chapref#1{chapter~\ref{#1}}
 % Reference to an equation, upper case.
 \def\Chapref#1{Chapter~\ref{#1}}
 % Reference to a range of chapters
 \def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}}
 % Reference to an algorithm, lower-case.
 \def\algref#1{algorithm~\ref{#1}}
 % Reference to an algorithm, upper case.
 \def\Algref#1{Algorithm~\ref{#1}}
 \def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}}
 \def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}}
 % Reference to a part, lower case
 \def\partref#1{part~\ref{#1}}
 % Reference to a part, upper case
 \def\Partref#1{Part~\ref{#1}}
 \def\twopartref#1#2{parts \ref{#1} and \ref{#2}}
 \def\ceil#1{\lceil #1 \rceil}
 \def\floor#1{\lfloor #1 \rfloor}
 \def\1{\bm{1}}
 \newcommand{\train}{\mathcal{D}}
 \newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}}
 \newcommand{\test}{\mathcal{D_{\mathrm{test}}}}
 \def\eps{{\epsilon}}
 % Random variables
 \def\reta{{\textnormal{$\eta$}}}
 \def\ra{{\textnormal{a}}}
 \def\rb{{\textnormal{b}}}
 \def\rc{{\textnormal{c}}}
 \def\rd{{\textnormal{d}}}
 \def\re{{\textnormal{e}}}
 \def\rf{{\textnormal{f}}}
 \def\rg{{\textnormal{g}}}
 \def\rh{{\textnormal{h}}}
 \def\ri{{\textnormal{i}}}
 \def\rj{{\textnormal{j}}}
 \def\rk{{\textnormal{k}}}
 \def\rl{{\textnormal{l}}}
 % rm is already a command, just don't name any random variables m
 \def\rn{{\textnormal{n}}}
 \def\ro{{\textnormal{o}}}
 \def\rp{{\textnormal{p}}}
 \def\rq{{\textnormal{q}}}
 \def\rr{{\textnormal{r}}}
 \def\rs{{\textnormal{s}}}
 \def\rt{{\textnormal{t}}}
 \def\ru{{\textnormal{u}}}
 \def\rv{{\textnormal{v}}}
 \def\rw{{\textnormal{w}}}
 \def\rx{{\textnormal{x}}}
 \def\ry{{\textnormal{y}}}
 \def\rz{{\textnormal{z}}}
 % Random vectors
 \def\rvepsilon{{\mathbf{\epsilon}}}
 \def\rvtheta{{\mathbf{\theta}}}
 \def\rva{{\mathbf{a}}}
 \def\rvb{{\mathbf{b}}}
 \def\rvc{{\mathbf{c}}}
 \def\rvd{{\mathbf{d}}}
 \def\rve{{\mathbf{e}}}
 \def\rvf{{\mathbf{f}}}
 \def\rvg{{\mathbf{g}}}
 \def\rvh{{\mathbf{h}}}
 \def\rvu{{\mathbf{i}}}
 \def\rvj{{\mathbf{j}}}
 \def\rvk{{\mathbf{k}}}
 \def\rvl{{\mathbf{l}}}
 \def\rvm{{\mathbf{m}}}
 \def\rvn{{\mathbf{n}}}
 \def\rvo{{\mathbf{o}}}
 \def\rvp{{\mathbf{p}}}
 \def\rvq{{\mathbf{q}}}
 \def\rvr{{\mathbf{r}}}
 \def\rvs{{\mathbf{s}}}
 \def\rvt{{\mathbf{t}}}
 \def\rvu{{\mathbf{u}}}
 \def\rvv{{\mathbf{v}}}
 \def\rvw{{\mathbf{w}}}
 \def\rvx{{\mathbf{x}}}
 \def\rvy{{\mathbf{y}}}
 \def\rvz{{\mathbf{z}}}
 % Elements of random vectors
 \def\erva{{\textnormal{a}}}
 \def\ervb{{\textnormal{b}}}
 \def\ervc{{\textnormal{c}}}
 \def\ervd{{\textnormal{d}}}
 \def\erve{{\textnormal{e}}}
 \def\ervf{{\textnormal{f}}}
 \def\ervg{{\textnormal{g}}}
 \def\ervh{{\textnormal{h}}}
 \def\ervi{{\textnormal{i}}}
 \def\ervj{{\textnormal{j}}}
 \def\ervk{{\textnormal{k}}}
 \def\ervl{{\textnormal{l}}}
 \def\ervm{{\textnormal{m}}}
 \def\ervn{{\textnormal{n}}}
 \def\ervo{{\textnormal{o}}}
 \def\ervp{{\textnormal{p}}}
 \def\ervq{{\textnormal{q}}}
 \def\ervr{{\textnormal{r}}}
 \def\ervs{{\textnormal{s}}}
 \def\ervt{{\textnormal{t}}}
 \def\ervu{{\textnormal{u}}}
 \def\ervv{{\textnormal{v}}}
 \def\ervw{{\textnormal{w}}}
 \def\ervx{{\textnormal{x}}}
 \def\ervy{{\textnormal{y}}}
 \def\ervz{{\textnormal{z}}}
 % Random matrices
 \def\rmA{{\mathbf{A}}}
 \def\rmB{{\mathbf{B}}}
 \def\rmC{{\mathbf{C}}}
 \def\rmD{{\mathbf{D}}}
 \def\rmE{{\mathbf{E}}}
 \def\rmF{{\mathbf{F}}}
 \def\rmG{{\mathbf{G}}}
 \def\rmH{{\mathbf{H}}}
 \def\rmI{{\mathbf{I}}}
 \def\rmJ{{\mathbf{J}}}
 \def\rmK{{\mathbf{K}}}
 \def\rmL{{\mathbf{L}}}
 \def\rmM{{\mathbf{M}}}
 \def\rmN{{\mathbf{N}}}
 \def\rmO{{\mathbf{O}}}
 \def\rmP{{\mathbf{P}}}
 \def\rmQ{{\mathbf{Q}}}
 \def\rmR{{\mathbf{R}}}
 \def\rmS{{\mathbf{S}}}
 \def\rmT{{\mathbf{T}}}
 \def\rmU{{\mathbf{U}}}
 \def\rmV{{\mathbf{V}}}
 \def\rmW{{\mathbf{W}}}
 \def\rmX{{\mathbf{X}}}
 \def\rmY{{\mathbf{Y}}}
 \def\rmZ{{\mathbf{Z}}}
 % Elements of random matrices
 \def\ermA{{\textnormal{A}}}
 \def\ermB{{\textnormal{B}}}
 \def\ermC{{\textnormal{C}}}
 \def\ermD{{\textnormal{D}}}
 \def\ermE{{\textnormal{E}}}
 \def\ermF{{\textnormal{F}}}
 \def\ermG{{\textnormal{G}}}
 \def\ermH{{\textnormal{H}}}
 \def\ermI{{\textnormal{I}}}
 \def\ermJ{{\textnormal{J}}}
 \def\ermK{{\textnormal{K}}}
 \def\ermL{{\textnormal{L}}}
 \def\ermM{{\textnormal{M}}}
 \def\ermN{{\textnormal{N}}}
 \def\ermO{{\textnormal{O}}}
 \def\ermP{{\textnormal{P}}}
 \def\ermQ{{\textnormal{Q}}}
 \def\ermR{{\textnormal{R}}}
 \def\ermS{{\textnormal{S}}}
 \def\ermT{{\textnormal{T}}}
 \def\ermU{{\textnormal{U}}}
 \def\ermV{{\textnormal{V}}}
 \def\ermW{{\textnormal{W}}}
 \def\ermX{{\textnormal{X}}}
 \def\ermY{{\textnormal{Y}}}
 \def\ermZ{{\textnormal{Z}}}
 % Vectors
 \def\vzero{{\bm{0}}}
 \def\vone{{\bm{1}}}
 \def\vmu{{\bm{\mu}}}
 \def\vtheta{{\bm{\theta}}}
 \def\va{{\bm{a}}}
 \def\vb{{\bm{b}}}
 \def\vc{{\bm{c}}}
 \def\vd{{\bm{d}}}
 \def\ve{{\bm{e}}}
 \def\vf{{\bm{f}}}
 \def\vg{{\bm{g}}}
 \def\vh{{\bm{h}}}
 \def\vi{{\bm{i}}}
 \def\vj{{\bm{j}}}
 \def\vk{{\bm{k}}}
 \def\vl{{\bm{l}}}
 \def\vm{{\bm{m}}}
 \def\vn{{\bm{n}}}
 \def\vo{{\bm{o}}}
 \def\vp{{\bm{p}}}
 \def\vq{{\bm{q}}}
 \def\vr{{\bm{r}}}
 \def\vs{{\bm{s}}}
 \def\vt{{\bm{t}}}
 \def\vu{{\bm{u}}}
 \def\vv{{\bm{v}}}
 \def\vw{{\bm{w}}}
 \def\vx{{\bm{x}}}
 \def\vy{{\bm{y}}}
 \def\vz{{\bm{z}}}
 % Elements of vectors
 \def\evalpha{{\alpha}}
 \def\evbeta{{\beta}}
 \def\evepsilon{{\epsilon}}
 \def\evlambda{{\lambda}}
 \def\evomega{{\omega}}
 \def\evmu{{\mu}}
 \def\evpsi{{\psi}}
 \def\evsigma{{\sigma}}
 \def\evtheta{{\theta}}
 \def\eva{{a}}
 \def\evb{{b}}
 \def\evc{{c}}
 \def\evd{{d}}
 \def\eve{{e}}
 \def\evf{{f}}
 \def\evg{{g}}
 \def\evh{{h}}
 \def\evi{{i}}
 \def\evj{{j}}
 \def\evk{{k}}
 \def\evl{{l}}
 \def\evm{{m}}
 \def\evn{{n}}
 \def\evo{{o}}
 \def\evp{{p}}
 \def\evq{{q}}
 \def\evr{{r}}
 \def\evs{{s}}
 \def\evt{{t}}
 \def\evu{{u}}
 \def\evv{{v}}
 \def\evw{{w}}
 \def\evx{{x}}
 \def\evy{{y}}
 \def\evz{{z}}
 % Matrix
 \def\mA{{\bm{A}}}
 \def\mB{{\bm{B}}}
 \def\mC{{\bm{C}}}
 \def\mD{{\bm{D}}}
 \def\mE{{\bm{E}}}
 \def\mF{{\bm{F}}}
 \def\mG{{\bm{G}}}
 \def\mH{{\bm{H}}}
 \def\mI{{\bm{I}}}
 \def\mJ{{\bm{J}}}
 \def\mK{{\bm{K}}}
 \def\mL{{\bm{L}}}
 \def\mM{{\bm{M}}}
 \def\mN{{\bm{N}}}
 \def\mO{{\bm{O}}}
 \def\mP{{\bm{P}}}
 \def\mQ{{\bm{Q}}}
 \def\mR{{\bm{R}}}
 \def\mS{{\bm{S}}}
 \def\mT{{\bm{T}}}
 \def\mU{{\bm{U}}}
 \def\mV{{\bm{V}}}
 \def\mW{{\bm{W}}}
 \def\mX{{\bm{X}}}
 \def\mY{{\bm{Y}}}
 \def\mZ{{\bm{Z}}}
 \def\mBeta{{\bm{\beta}}}
 \def\mPhi{{\bm{\Phi}}}
 \def\mLambda{{\bm{\Lambda}}}
 \def\mSigma{{\bm{\Sigma}}}
 % Tensor
 \DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl}
 \SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n}
 \newcommand{\tens}[1]{\bm{\mathsfit{#1}}}
 \def\tA{{\tens{A}}}
 \def\tB{{\tens{B}}}
 \def\tC{{\tens{C}}}
 \def\tD{{\tens{D}}}
 \def\tE{{\tens{E}}}
 \def\tF{{\tens{F}}}
 \def\tG{{\tens{G}}}
 \def\tH{{\tens{H}}}
 \def\tI{{\tens{I}}}
 \def\tJ{{\tens{J}}}
 \def\tK{{\tens{K}}}
 \def\tL{{\tens{L}}}
 \def\tM{{\tens{M}}}
 \def\tN{{\tens{N}}}
 \def\tO{{\tens{O}}}
 \def\tP{{\tens{P}}}
 \def\tQ{{\tens{Q}}}
 \def\tR{{\tens{R}}}
 \def\tS{{\tens{S}}}
 \def\tT{{\tens{T}}}
 \def\tU{{\tens{U}}}
 \def\tV{{\tens{V}}}
 \def\tW{{\tens{W}}}
 \def\tX{{\tens{X}}}
 \def\tY{{\tens{Y}}}
 \def\tZ{{\tens{Z}}}
 % Graph
 \def\gA{{\mathcal{A}}}
 \def\gB{{\mathcal{B}}}
 \def\gC{{\mathcal{C}}}
 \def\gD{{\mathcal{D}}}
 \def\gE{{\mathcal{E}}}
 \def\gF{{\mathcal{F}}}
 \def\gG{{\mathcal{G}}}
 \def\gH{{\mathcal{H}}}
 \def\gI{{\mathcal{I}}}
 \def\gJ{{\mathcal{J}}}
 \def\gK{{\mathcal{K}}}
 \def\gL{{\mathcal{L}}}
 \def\gM{{\mathcal{M}}}
 \def\gN{{\mathcal{N}}}
 \def\gO{{\mathcal{O}}}
 \def\gP{{\mathcal{P}}}
 \def\gQ{{\mathcal{Q}}}
 \def\gR{{\mathcal{R}}}
 \def\gS{{\mathcal{S}}}
 \def\gT{{\mathcal{T}}}
 \def\gU{{\mathcal{U}}}
 \def\gV{{\mathcal{V}}}
 \def\gW{{\mathcal{W}}}
 \def\gX{{\mathcal{X}}}
 \def\gY{{\mathcal{Y}}}
 \def\gZ{{\mathcal{Z}}}
 % Sets
 \def\sA{{\mathbb{A}}}
 \def\sB{{\mathbb{B}}}
 \def\sC{{\mathbb{C}}}
 \def\sD{{\mathbb{D}}}
 % Don't use a set called E, because this would be the same as our symbol
 % for expectation.
 \def\sF{{\mathbb{F}}}
 \def\sG{{\mathbb{G}}}
 \def\sH{{\mathbb{H}}}
 \def\sI{{\mathbb{I}}}
 \def\sJ{{\mathbb{J}}}
 \def\sK{{\mathbb{K}}}
 \def\sL{{\mathbb{L}}}
 \def\sM{{\mathbb{M}}}
 \def\sN{{\mathbb{N}}}
 \def\sO{{\mathbb{O}}}
 \def\sP{{\mathbb{P}}}
 \def\sQ{{\mathbb{Q}}}
 \def\sR{{\mathbb{R}}}
 \def\sS{{\mathbb{S}}}
 \def\sT{{\mathbb{T}}}
 \def\sU{{\mathbb{U}}}
 \def\sV{{\mathbb{V}}}
 \def\sW{{\mathbb{W}}}
 \def\sX{{\mathbb{X}}}
 \def\sY{{\mathbb{Y}}}
 \def\sZ{{\mathbb{Z}}}
 % Entries of a matrix
 \def\emLambda{{\Lambda}}
 \def\emA{{A}}
 \def\emB{{B}}
 \def\emC{{C}}
 \def\emD{{D}}
 \def\emE{{E}}
 \def\emF{{F}}
 \def\emG{{G}}
 \def\emH{{H}}
 \def\emI{{I}}
 \def\emJ{{J}}
 \def\emK{{K}}
 \def\emL{{L}}
 \def\emM{{M}}
 \def\emN{{N}}
 \def\emO{{O}}
 \def\emP{{P}}
 \def\emQ{{Q}}
 \def\emR{{R}}
 \def\emS{{S}}
 \def\emT{{T}}
 \def\emU{{U}}
 \def\emV{{V}}
 \def\emW{{W}}
 \def\emX{{X}}
 \def\emY{{Y}}
 \def\emZ{{Z}}
 \def\emSigma{{\Sigma}}
 % entries of a tensor
 % Same font as tensor, without \bm wrapper
 \newcommand{\etens}[1]{\mathsfit{#1}}
 \def\etLambda{{\etens{\Lambda}}}
 \def\etA{{\etens{A}}}
 \def\etB{{\etens{B}}}
 \def\etC{{\etens{C}}}
 \def\etD{{\etens{D}}}
 \def\etE{{\etens{E}}}
 \def\etF{{\etens{F}}}
 \def\etG{{\etens{G}}}
 \def\etH{{\etens{H}}}
 \def\etI{{\etens{I}}}
 \def\etJ{{\etens{J}}}
 \def\etK{{\etens{K}}}
 \def\etL{{\etens{L}}}
 \def\etM{{\etens{M}}}
 \def\etN{{\etens{N}}}
 \def\etO{{\etens{O}}}
 \def\etP{{\etens{P}}}
 \def\etQ{{\etens{Q}}}
 \def\etR{{\etens{R}}}
 \def\etS{{\etens{S}}}
 \def\etT{{\etens{T}}}
 \def\etU{{\etens{U}}}
 \def\etV{{\etens{V}}}
 \def\etW{{\etens{W}}}
 \def\etX{{\etens{X}}}
 \def\etY{{\etens{Y}}}
 \def\etZ{{\etens{Z}}}
 % The true underlying data generating distribution
 \newcommand{\pdata}{p_{\rm{data}}}
 % The empirical distribution defined by the training set
 \newcommand{\ptrain}{\hat{p}_{\rm{data}}}
 \newcommand{\Ptrain}{\hat{P}_{\rm{data}}}
 % The model distribution
 \newcommand{\pmodel}{p_{\rm{model}}}
 \newcommand{\Pmodel}{P_{\rm{model}}}
 \newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}}
 % Stochastic autoencoder distributions
 \newcommand{\pencode}{p_{\rm{encoder}}}
 \newcommand{\pdecode}{p_{\rm{decoder}}}
 \newcommand{\precons}{p_{\rm{reconstruct}}}
 \newcommand{\laplace}{\mathrm{Laplace}} % Laplace distribution
 \newcommand{\E}{\mathbb{E}}
 \newcommand{\Ls}{\mathcal{L}}
 \newcommand{\R}{\mathbb{R}}
 \newcommand{\emp}{\tilde{p}}
 \newcommand{\lr}{\alpha}
 \newcommand{\reg}{\lambda}
 \newcommand{\rect}{\mathrm{rectifier}}
 \newcommand{\softmax}{\mathrm{softmax}}
 \newcommand{\sigmoid}{\sigma}
 \newcommand{\softplus}{\zeta}
 \newcommand{\KL}{D_{\mathrm{KL}}}
 \newcommand{\Var}{\mathrm{Var}}
 \newcommand{\standarderror}{\mathrm{SE}}
 \newcommand{\Cov}{\mathrm{Cov}}
 % Wolfram Mathworld says $L^2$ is for function spaces and $\ell^2$ is for vectors
 % But then they seem to use $L^2$ for vectors throughout the site, and so does
 % wikipedia.
 \newcommand{\normlzero}{L^0}
 \newcommand{\normlone}{L^1}
 \newcommand{\normltwo}{L^2}
 \newcommand{\normlp}{L^p}
 \newcommand{\normmax}{L^\infty}
 \newcommand{\parents}{Pa} % See usage in notation.tex. Chosen to match Daphne's book.
 \DeclareMathOperator*{\argmax}{arg\,max}
 \DeclareMathOperator*{\argmin}{arg\,min}
 \DeclareMathOperator{\sign}{sign}
 \DeclareMathOperator{\Tr}{Tr}
 \let\ab\allowbreak
--- a/skills/mlops/ml-paper-writing/templates/colm2025/natbib.sty
+++ b/skills/mlops/ml-paper-writing/templates/colm2025/natbib.sty
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/fancyhdr.sty
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/fancyhdr.sty
@ -0,0 +1,485 @@
 % fancyhdr.sty version 3.2
 % Fancy headers and footers for LaTeX.
 % Piet van Oostrum, 
 % Dept of Computer and Information Sciences, University of Utrecht,
 % Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
 % Telephone: +31 30 2532180. Email: piet@cs.uu.nl
 % ========================================================================
 % LICENCE:
 % This file may be distributed under the terms of the LaTeX Project Public
 % License, as described in lppl.txt in the base LaTeX distribution.
 % Either version 1 or, at your option, any later version.
 % ========================================================================
 % MODIFICATION HISTORY:
 % Sep 16, 1994
 % version 1.4: Correction for use with \reversemargin
 % Sep 29, 1994:
 % version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
 % Oct 4, 1994:
 % version 1.6: Reset single spacing in headers/footers for use with
 % setspace.sty or doublespace.sty
 % Oct 4, 1994:
 % version 1.7: changed \let\@mkboth\markboth to
 % \def\@mkboth{\protect\markboth} to make it more robust
 % Dec 5, 1994:
 % version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
 % importantly) use the \chapter/sectionmark definitions from ps@headings if
 % they exist (which should be true for all standard classes).
 % May 31, 1995:
 % version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
 % construction in the doc did not work properly with the fancyplain style. 
 % June 1, 1995:
 % version 1.91: The definition of \@mkboth wasn't restored on subsequent
 % \pagestyle{fancy}'s.
 % June 1, 1995:
 % version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
 % \pagestyle{fancy} would erroneously select the plain version.
 % June 1, 1995:
 % version 1.93: \fancypagestyle command added.
 % Dec 11, 1995:
 % version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
 % CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
 % position (old hardcoded value of .3\normalbaselineskip is far too high
 % when used with very small footer fonts).
 % Jan 31, 1996:
 % version 1.95: call \@normalsize in the reset code if that is defined,
 % otherwise \normalsize.
 % this is to solve a problem with ucthesis.cls, as this doesn't
 % define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
 % work as this is optimized to do very little, so there \@normalsize should
 % be called. Hopefully this code works for all versions of LaTeX known to
 % mankind.  
 % April 25, 1996:
 % version 1.96: initialize \headwidth to a magic (negative) value to catch
 % most common cases that people change it before calling \pagestyle{fancy}.
 % Note it can't be initialized when reading in this file, because
 % \textwidth could be changed afterwards. This is quite probable.
 % We also switch to \MakeUppercase rather than \uppercase and introduce a
 % \nouppercase command for use in headers. and footers.
 % May 3, 1996:
 % version 1.97: Two changes:
 % 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
 % for the chapter and section marks. The current version of amsbook and
 % amsart classes don't seem to need them anymore. Moreover the standard
 % latex classes don't use \markboth if twoside isn't selected, and this is
 % confusing as \leftmark doesn't work as expected.
 % 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
 % in the amsbook and amsart classes, that make global changes to \topskip,
 % which are reset in \ps@empty. Hopefully this doesn't break other things.
 % May 7, 1996:
 % version 1.98:
 % Added % after the line  \def\nouppercase
 % May 7, 1996:
 % version 1.99: This is the alpha version of fancyhdr 2.0
 % Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
 % Changed \headrulewidth, \footrulewidth, \footruleskip to
 % macros rather than length parameters, In this way they can be
 % conditionalized and they don't consume length registers. There is no need
 % to have them as length registers unless you want to do calculations with
 % them, which is unlikely. Note that this may make some uses of them
 % incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
 % May 10, 1996:
 % version 1.99a:
 % Added a few more % signs
 % May 10, 1996:
 % version 1.99b:
 % Changed the syntax of \f@nfor to be resistent to catcode changes of :=
 % Removed the [1] from the defs of \lhead etc. because the parameter is
 % consumed by the \@[xy]lhead etc. macros.
 % June 24, 1997:
 % version 1.99c:
 % corrected \nouppercase to also include the protected form of \MakeUppercase
 % \global added to manipulation of \headwidth.
 % \iffootnote command added.
 % Some comments added about \@fancyhead and \@fancyfoot.
 % Aug 24, 1998
 % version 1.99d
 % Changed the default \ps@empty to \ps@@empty in order to allow
 % \fancypagestyle{empty} redefinition.
 % Oct 11, 2000
 % version 2.0
 % Added LPPL license clause.
 %
 % A check for \headheight is added. An errormessage is given (once) if the
 % header is too large. Empty headers don't generate the error even if
 % \headheight is very small or even 0pt. 
 % Warning added for the use of 'E' option when twoside option is not used.
 % In this case the 'E' fields will never be used.
 %
 % Mar 10, 2002
 % version 2.1beta
 % New command: \fancyhfoffset[place]{length}
 % defines offsets to be applied to the header/footer to let it stick into
 % the margins (if length > 0).
 % place is like in fancyhead, except that only E,O,L,R can be used.
 % This replaces the old calculation based on \headwidth and the marginpar
 % area.
 % \headwidth will be dynamically calculated in the headers/footers when
 % this is used.
 %
 % Mar 26, 2002
 % version 2.1beta2
 % \fancyhfoffset now also takes h,f as possible letters in the argument to
 % allow the header and footer widths to be different.
 % New commands \fancyheadoffset and \fancyfootoffset added comparable to
 % \fancyhead and \fancyfoot.
 % Errormessages and warnings have been made more informative.
 %
 % Dec 9, 2002
 % version 2.1
 % The defaults for \footrulewidth, \plainheadrulewidth and
 % \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
 % someone inadvertantly uses \setlength to change any of these, the value
 % of \z@skip will not be changed, rather an errormessage will be given.
 % March 3, 2004
 % Release of version 3.0
 % Oct 7, 2004
 % version 3.1
 % Added '\endlinechar=13' to \fancy@reset to prevent problems with
 % includegraphics in header when verbatiminput is active.
 % March 22, 2005
 % version 3.2
 % reset \everypar (the real one) in \fancy@reset because spanish.ldf does
 % strange things with \everypar between << and >>.
 \def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
 \def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
                                   \fancy@gbl\def#1{#2\strut}\fi}
 \let\fancy@gbl\global
 \def\@fancyerrmsg#1{%
        \ifx\PackageError\undefined
        \errmessage{#1}\else
        \PackageError{Fancyhdr}{#1}{}\fi}
 \def\@fancywarning#1{%
        \ifx\PackageWarning\undefined
        \errmessage{#1}\else
        \PackageWarning{Fancyhdr}{#1}{}\fi}
 % Usage: \@forc \var{charstring}{command to be executed for each char}
 % This is similar to LaTeX's \@tfor, but expands the charstring.
 \def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
 \def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
                                    \f@@rc#1#2\f@@rc{#3}\fi}
 \def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
 % Usage: \f@nfor\name:=list\do{body}
 % Like LaTeX's \@for but an empty list is treated as a list with an empty
 % element
 \newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
    \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
 % Usage: \def@ult \cs{defaults}{argument}
 % sets \cs to the characters from defaults appearing in argument
 % or defaults if it would be empty. All characters are lowercased.
 \newcommand\def@ult[3]{%
    \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
    \def#1{}%
    \@forc\tmpf@ra{#2}%
        {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
    \ifx\@empty#1\def#1{#2}\fi}
 % 
 % \if@in <char><set><truecase><falsecase>
 %
 \newcommand{\if@in}[4]{%
    \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
    \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
 \newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
                                     {\f@ncyhf\fancyhead h[]}}
 \newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
                                     {\f@ncyhf\fancyfoot f[]}}
 \newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
                                   {\f@ncyhf\fancyhf{}[]}}
 % New commands for offsets added
 \newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
                                           {\f@ncyhfoffs\fancyheadoffset h[]}}
 \newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
                                           {\f@ncyhfoffs\fancyfootoffset f[]}}
 \newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
                                         {\f@ncyhfoffs\fancyhfoffset{}[]}}
 % The header and footer fields are stored in command sequences with
 % names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
 % and <z> from [hf].
 \def\f@ncyhf#1#2[#3]#4{%
    \def\temp@c{}%
    \@forc\tmpf@ra{#3}%
        {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
            {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
    \ifx\@empty\temp@c\else
        \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
          [#3]}%
    \fi
    \f@nfor\temp@c{#3}%
        {\def@ult\f@@@eo{eo}\temp@c
         \if@twoside\else
           \if\f@@@eo e\@fancywarning
             {\string#1's `E' option without twoside option is useless}\fi\fi
         \def@ult\f@@@lcr{lcr}\temp@c
         \def@ult\f@@@hf{hf}{#2\temp@c}%
         \@forc\f@@eo\f@@@eo
             {\@forc\f@@lcr\f@@@lcr
                 {\@forc\f@@hf\f@@@hf
                     {\expandafter\fancy@def\csname
                      f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
                      {#4}}}}}}
 \def\f@ncyhfoffs#1#2[#3]#4{%
    \def\temp@c{}%
    \@forc\tmpf@ra{#3}%
        {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
            {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
    \ifx\@empty\temp@c\else
        \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
          [#3]}%
    \fi
    \f@nfor\temp@c{#3}%
        {\def@ult\f@@@eo{eo}\temp@c
         \if@twoside\else
           \if\f@@@eo e\@fancywarning
             {\string#1's `E' option without twoside option is useless}\fi\fi
         \def@ult\f@@@lcr{lr}\temp@c
         \def@ult\f@@@hf{hf}{#2\temp@c}%
         \@forc\f@@eo\f@@@eo
             {\@forc\f@@lcr\f@@@lcr
                 {\@forc\f@@hf\f@@@hf
                     {\expandafter\setlength\csname
                      f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
                      {#4}}}}}%
     \fancy@setoffs}
 % Fancyheadings version 1 commands. These are more or less deprecated,
 % but they continue to work.
 \newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
 \def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
 \def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
 \newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
 \def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
 \def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
 \newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
 \def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
 \def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
 \newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
 \def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
 \def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
 \newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
 \def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
 \def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
 \newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
 \def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
 \def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
 \newlength{\fancy@headwidth}
 \let\headwidth\fancy@headwidth
 \newlength{\f@ncyO@elh}
 \newlength{\f@ncyO@erh}
 \newlength{\f@ncyO@olh}
 \newlength{\f@ncyO@orh}
 \newlength{\f@ncyO@elf}
 \newlength{\f@ncyO@erf}
 \newlength{\f@ncyO@olf}
 \newlength{\f@ncyO@orf}
 \newcommand{\headrulewidth}{0.4pt}
 \newcommand{\footrulewidth}{0pt}
 \newcommand{\footruleskip}{.3\normalbaselineskip}
 % Fancyplain stuff shouldn't be used anymore (rather
 % \fancypagestyle{plain} should be used), but it must be present for
 % compatibility reasons.
 \newcommand{\plainheadrulewidth}{0pt}
 \newcommand{\plainfootrulewidth}{0pt}
 \newif\if@fancyplain \@fancyplainfalse
 \def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
 \headwidth=-123456789sp %magic constant
 % Command to reset various things in the headers:
 % a.o.  single spacing (taken from setspace.sty)
 % and the catcode of ^^M (so that epsf files in the header work if a
 % verbatim crosses a page boundary)
 % It also defines a \nouppercase command that disables \uppercase and
 % \Makeuppercase. It can only be used in the headers and footers.
 \let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
 \def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
 \def\baselinestretch{1}%
 \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
     \expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
 \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
   \ifx\@normalsize\undefined \normalsize % for ucthesis.cls
   \else \@normalsize \fi
 \else% NFSS (2.09) present
  \@newbaseline%
 \fi}
 % Initialization of the head and foot text.
 % The default values still contain \fancyplain for compatibility.
 \fancyhf{} % clear all
 % lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
 % evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
 \if@twoside
  \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
  \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
 \else
  \fancyhead[l]{\fancyplain{}{\sl\rightmark}}
  \fancyhead[r]{\fancyplain{}{\sl\leftmark}}
 \fi
 \fancyfoot[c]{\rm\thepage} % page number
 % Use box 0 as a temp box and dimen 0 as temp dimen. 
 % This can be done, because this code will always
 % be used inside another box, and therefore the changes are local.
 \def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
  {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
    We now make it that large for the rest of the document.^^J
    This may cause the page layout to be inconsistent, however\@gobble}%
  \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
  \box0}
 % Put together a header or footer given the left, center and
 % right text, fillers at left and right and a rule.
 % The \lap commands put the text into an hbox of zero size,
 % so overlapping text does not generate an errormessage.
 % These macros have 5 parameters:
 % 1. LEFTSIDE BEARING % This determines at which side the header will stick
 %    out. When \fancyhfoffset is used this calculates \headwidth, otherwise
 %    it is \hss or \relax (after expansion).
 % 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
 % 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
 % 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
 % 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
 \def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
  \@fancyvbox\headheight{\hbox
    {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
      \parbox[b]{\headwidth}{\centering#3}\hfill
      \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
 \def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
    \@fancyvbox\footskip{\footrule
      \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
        \parbox[t]{\headwidth}{\centering#3}\hfill
        \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
 \def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
    \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
 \def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
    \vskip-\footruleskip\vskip-\footrulewidth
    \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
 \def\ps@fancy{%
 \@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
 %
 % Define \MakeUppercase for old LaTeXen.
 % Note: we used \def rather than \let, so that \let\uppercase\relax (from
 % the version 1 documentation) will still work.
 %
 \@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
 \@ifundefined{chapter}{\def\sectionmark##1{\markboth
 {\MakeUppercase{\ifnum \c@secnumdepth>\z@
 \thesection\hskip 1em\relax \fi ##1}}{}}%
 \def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
 \thesubsection\hskip 1em\relax \fi ##1}}}%
 {\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
 \@chapapp\ \thechapter. \ \fi ##1}}{}}%
 \def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
 \thesection. \ \fi ##1}}}}%
 %\csname ps@headings\endcsname % use \ps@headings defaults if they exist
 \ps@@fancy
 \gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
 % Initialize \headwidth if the user didn't
 %
 \ifdim\headwidth<0sp
 %
 % This catches the case that \headwidth hasn't been initialized and the
 % case that the user added something to \headwidth in the expectation that
 % it was initialized to \textwidth. We compensate this now. This loses if
 % the user intended to multiply it by a factor. But that case is more
 % likely done by saying something like \headwidth=1.2\textwidth. 
 % The doc says you have to change \headwidth after the first call to
 % \pagestyle{fancy}. This code is just to catch the most common cases were
 % that requirement is violated.
 %
    \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
 \fi}
 \def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
 \def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
 \let\ps@@empty\ps@empty
 \def\ps@@fancy{%
 \ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
 \def\@mkboth{\protect\markboth}%
 \def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
 \def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
 \def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
 \def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
 }
 % Default definitions for compatibility mode:
 % These cause the header/footer to take the defined \headwidth as width
 % And to shift in the direction of the marginpar area
 \def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
 \def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
 \let\fancy@Oelh\fancy@Oorh
 \let\fancy@Oerh\fancy@Oolh
 \let\fancy@Oolf\fancy@Oolh
 \let\fancy@Oorf\fancy@Oorh
 \let\fancy@Oelf\fancy@Oelh
 \let\fancy@Oerf\fancy@Oerh
 % New definitions for the use of \fancyhfoffset
 % These calculate the \headwidth from \textwidth and the specified offsets.
 \def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
                   \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
 \def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
                   \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
 \def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
                   \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
 \def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
                   \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
 \def\fancy@setoffs{%
 % Just in case \let\headwidth\textwidth was used
  \fancy@gbl\let\headwidth\fancy@headwidth
  \fancy@gbl\let\fancy@Oolh\fancy@offsolh
  \fancy@gbl\let\fancy@Oelh\fancy@offselh
  \fancy@gbl\let\fancy@Oorh\hss
  \fancy@gbl\let\fancy@Oerh\hss
  \fancy@gbl\let\fancy@Oolf\fancy@offsolf
  \fancy@gbl\let\fancy@Oelf\fancy@offself
  \fancy@gbl\let\fancy@Oorf\hss
  \fancy@gbl\let\fancy@Oerf\hss}
 \newif\iffootnote
 \let\latex@makecol\@makecol
 \def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
 \let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
 \def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
 \def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
 \def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
 \newcommand{\fancypagestyle}[2]{%
  \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.bib
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.bib
@ -0,0 +1,24 @@
@incollection{Bengio+chapter2007,
 author = {Bengio, Yoshua and LeCun, Yann},
 booktitle = {Large Scale Kernel Machines},
 publisher = {MIT Press},
 title = {Scaling Learning Algorithms Towards {AI}},
 year = {2007}
 }
@article{Hinton06,
 author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee Whye},
 journal = {Neural Computation},
 pages = {1527--1554},
 title = {A Fast Learning Algorithm for Deep Belief Nets},
 volume = {18},
 year = {2006}
 }
@book{goodfellow2016deep,
 title={Deep learning},
 author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
 volume={1},
 year={2016},
 publisher={MIT Press}
 }
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.bst
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.bst
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.pdf
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.pdf
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.sty
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.sty
@ -0,0 +1,246 @@
 %%%% ICLR Macros (LaTex)
 %%%% Adapted by Hugo Larochelle from the NIPS stylefile Macros
 %%%% Style File
 %%%% Dec 12, 1990   Rev Aug 14, 1991; Sept, 1995; April, 1997; April, 1999; October 2014
 % This file can be used with Latex2e whether running in main mode, or
 % 2.09 compatibility mode.
 %
 % If using main mode, you need to include the commands
 %             \documentclass{article}
 %             \usepackage{iclr14submit_e,times}
 %
 % Change the overall width of the page.  If these parameters are
 %       changed, they will require corresponding changes in the
 %       maketitle section.
 %
 \usepackage{eso-pic} % used by \AddToShipoutPicture
 \RequirePackage{fancyhdr}
 \RequirePackage{natbib}
 % modification to natbib citations
 \setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
 \renewcommand{\topfraction}{0.95}   % let figure take up nearly whole page
 \renewcommand{\textfraction}{0.05}  % let figure take up nearly whole page
 % Define iclrfinal, set to true if iclrfinalcopy is defined
 \newif\ificlrfinal
 \iclrfinalfalse
 \def\iclrfinalcopy{\iclrfinaltrue}
 \font\iclrtenhv  = phvb at 8pt
 % Specify the dimensions of each page
 \setlength{\paperheight}{11in}
 \setlength{\paperwidth}{8.5in}
 \oddsidemargin .5in    %   Note \oddsidemargin = \evensidemargin
 \evensidemargin .5in
 \marginparwidth 0.07 true in
 %\marginparwidth 0.75 true in
 %\topmargin 0 true pt           % Nominal distance from top of page to top of
 %\topmargin 0.125in
 \topmargin -0.625in
 \addtolength{\headsep}{0.25in}
 \textheight 9.0 true in       % Height of text (including footnotes & figures)
 \textwidth 5.5 true in        % Width of text line.
 \widowpenalty=10000
 \clubpenalty=10000
 % \thispagestyle{empty}        \pagestyle{empty}
 \flushbottom \sloppy
 % We're never going to need a table of contents, so just flush it to
 % save space --- suggested by drstrip@sandia-2
 \def\addcontentsline#1#2#3{}
 % Title stuff, taken from deproc.
 \def\maketitle{\par
 \begingroup
   \def\thefootnote{\fnsymbol{footnote}}
   \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author
                                                        % name centering
 %   The footnote-mark was overlapping the footnote-text,
 %   added the following to fix this problem               (MK)
   \long\def\@makefntext##1{\parindent 1em\noindent
                            \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1}
   \@maketitle \@thanks
 \endgroup
 \setcounter{footnote}{0}
 \let\maketitle\relax \let\@maketitle\relax
 \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
 % The toptitlebar has been raised to top-justify the first page
 \usepackage{fancyhdr}
 \pagestyle{fancy}
 \fancyhead{}
 % Title (includes both anonimized and non-anonimized versions)
 \def\@maketitle{\vbox{\hsize\textwidth
 %\linewidth\hsize \vskip 0.1in \toptitlebar \centering
 {\LARGE\sc \@title\par}
 %\bottomtitlebar % \vskip 0.1in %  minus
 \ificlrfinal
    \lhead{Published as a conference paper at ICLR 2026}
    \def\And{\end{tabular}\hfil\linebreak[0]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
  \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
    \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}%
 \else
       \lhead{Under review as a conference paper at ICLR 2026}
   \def\And{\end{tabular}\hfil\linebreak[0]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
  \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
            \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
    \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}Anonymous authors\\Paper under double-blind review\end{tabular}%
 \fi
 \vskip 0.3in minus 0.1in}}
 \renewenvironment{abstract}{\vskip.075in\centerline{\large\sc
 Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
 % sections with less space
 \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
    -0.5ex minus -.2ex}{1.5ex plus 0.3ex
 minus0.2ex}{\large\sc\raggedright}}
 \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
 -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\sc\raggedright}}
 \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex
 plus      -0.5ex minus -.2ex}{0.5ex plus
 .2ex}{\normalsize\sc\raggedright}}
 \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
 \def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
  0.5ex minus .2ex}{-1em}{\normalsize\sc}}
 \def\subsubsubsection{\vskip
 5pt{\noindent\normalsize\rm\raggedright}}
 % Footnotes
 \footnotesep 6.65pt %
 \skip\footins 9pt plus 4pt minus 2pt
 \def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt }
 \setcounter{footnote}{0}
 % Lists and paragraphs
 \parindent 0pt
 \topsep 4pt plus 1pt minus 2pt
 \partopsep 1pt plus 0.5pt minus 0.5pt
 \itemsep 2pt plus 1pt minus 0.5pt
 \parsep 2pt plus 1pt minus 0.5pt
 \parskip .5pc
 %\leftmargin2em
 \leftmargin3pc
 \leftmargini\leftmargin \leftmarginii 2em
 \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
 %\labelsep \labelsep 5pt
 \def\@listi{\leftmargin\leftmargini}
 \def\@listii{\leftmargin\leftmarginii
   \labelwidth\leftmarginii\advance\labelwidth-\labelsep
   \topsep 2pt plus 1pt minus 0.5pt
   \parsep 1pt plus 0.5pt minus 0.5pt
   \itemsep \parsep}
 \def\@listiii{\leftmargin\leftmarginiii
    \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
    \topsep 1pt plus 0.5pt minus 0.5pt
    \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
    \itemsep \topsep}
 \def\@listiv{\leftmargin\leftmarginiv
     \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
 \def\@listv{\leftmargin\leftmarginv
     \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
 \def\@listvi{\leftmargin\leftmarginvi
     \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
 \abovedisplayskip 7pt plus2pt minus5pt%
 \belowdisplayskip \abovedisplayskip
 \abovedisplayshortskip  0pt plus3pt%
 \belowdisplayshortskip  4pt plus3pt minus3pt%
 % Less leading in most fonts (due to the narrow columns)
 % The choices were between 1-pt and 1.5-pt leading
 %\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} % got rid of @ (MK)
 \def\normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
 \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
 \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
 \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
 \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
 \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
 \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
 \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
 \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
 \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
 \def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip}
 \def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip
 .09in} %
 %Reduced second vskip to compensate for adding the strut in \@author
 %% % Vertical Ruler
 %% % This code is, largely, from the CVPR 2010 conference style file
 %% % ----- define vruler
 \makeatletter
 \newbox\iclrrulerbox
 \newcount\iclrrulercount
 \newdimen\iclrruleroffset
 \newdimen\cv@lineheight
 \newdimen\cv@boxheight
 \newbox\cv@tmpbox
 \newcount\cv@refno
 \newcount\cv@tot
 % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
 \newcount\cv@tmpc@ \newcount\cv@tmpc
 \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
 \cv@tmpc=1 %
 \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
   \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
 \ifnum#2<0\advance\cv@tmpc1\relax-\fi
 \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
 \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
 % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
 \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
 \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
 \global\setbox\iclrrulerbox=\vbox to \textheight{%
 {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
 \cv@lineheight=#1\global\iclrrulercount=#2%
 \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
 \cv@refno1\vskip-\cv@lineheight\vskip1ex%
 \loop\setbox\cv@tmpbox=\hbox to0cm{{\iclrtenhv\hfil\fillzeros[#4]\iclrrulercount}}%
 \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
 \advance\cv@refno1\global\advance\iclrrulercount#3\relax
 \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
 \makeatother
 % ----- end of vruler
 % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
 \def\iclrruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\iclrrulerbox}}
 \AddToShipoutPicture{%
 \ificlrfinal\else
 \iclrruleroffset=\textheight
 \advance\iclrruleroffset by -3.7pt
  \color[rgb]{.7,.7,.7}
  \AtTextUpperLeft{%
    \put(\LenToUnit{-35pt},\LenToUnit{-\iclrruleroffset}){%left ruler
      \iclrruler{\iclrrulercount}}
  }
 \fi
 }
 % %% To add a vertical bar on the side
 % \AddToShipoutPicture{
 % \AtTextLowerLeft{
 % \hspace*{-1.8cm}
 % \colorbox[rgb]{0.7,0.7,0.7}{\small \parbox[b][\textheight]{0.1cm}{}}}
 % }
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.tex
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/iclr2026_conference.tex
@ -0,0 +1,414 @@
 \documentclass{article} % For LaTeX2e
 \usepackage{iclr2026_conference,times}
 % Optional math commands from https://github.com/goodfeli/dlbook_notation.
 \input{math_commands.tex}
 \usepackage{hyperref}
 \usepackage{url}
 \title{Formatting Instructions for ICLR 2026 \\ Conference Submissions}
 % Authors must not appear in the submitted version. They should be hidden
 % as long as the \iclrfinalcopy macro remains commented out below.
 % Non-anonymous submissions will be rejected without review.
 \author{Antiquus S.~Hippocampus, Natalia Cerebro \& Amelie P. Amygdale \thanks{ Use footnote for providing further information
 about author (webpage, alternative address)---\emph{not} for acknowledging
 funding agencies.  Funding acknowledgements go at the end of the paper.} \\
 Department of Computer Science\\
 Cranberry-Lemon University\\
 Pittsburgh, PA 15213, USA \\
 \texttt{\{hippo,brain,jen\}@cs.cranberry-lemon.edu} \\
 \And
 Ji Q. Ren \& Yevgeny LeNet \\
 Department of Computational Neuroscience \\
 University of the Witwatersrand \\
 Joburg, South Africa \\
 \texttt{\{robot,net\}@wits.ac.za} \\
 \AND
 Coauthor \\
 Affiliation \\
 Address \\
 \texttt{email}
 }
 % The \author macro works with any number of authors. There are two commands
 % used to separate the names and addresses of multiple authors: \And and \AND.
 %
 % Using \And between authors leaves it to \LaTeX{} to determine where to break
 % the lines. Using \AND forces a linebreak at that point. So, if \LaTeX{}
 % puts 3 of 4 authors names on the first line, and the last on the second
 % line, try using \AND instead of \And before the third author name.
 \newcommand{\fix}{\marginpar{FIX}}
 \newcommand{\new}{\marginpar{NEW}}
 %\iclrfinalcopy % Uncomment for camera-ready version, but NOT for submission.
 \begin{document}
 \maketitle
 \begin{abstract}
 The abstract paragraph should be indented 1/2~inch (3~picas) on both left and
 right-hand margins. Use 10~point type, with a vertical spacing of 11~points.
 The word \textsc{Abstract} must be centered, in small caps, and in point size 12. Two
 line spaces precede the abstract. The abstract must be limited to one
 paragraph.
 \end{abstract}
 \section{Submission of conference papers to ICLR 2026}
 ICLR requires electronic submissions, processed by
 \url{https://openreview.net/}. See ICLR's website for more instructions.
 If your paper is ultimately accepted, the statement {\tt
  {\textbackslash}iclrfinalcopy} should be inserted to adjust the
 format to the camera ready requirements.
 The format for the submissions is a variant of the NeurIPS format.
 Please read carefully the instructions below, and follow them
 faithfully.
 \subsection{Style}
 Papers to be submitted to ICLR 2026 must be prepared according to the
 instructions presented here.
 %% Please note that we have introduced automatic line number generation
 %% into the style file for \LaTeXe. This is to help reviewers
 %% refer to specific lines of the paper when they make their comments. Please do
 %% NOT refer to these line numbers in your paper as they will be removed from the
 %% style file for the final version of accepted papers.
 Authors are required to use the ICLR \LaTeX{} style files obtainable at the
 ICLR website. Please make sure you use the current files and
 not previous versions. Tweaking the style files may be grounds for rejection.
 \subsection{Retrieval of style files}
 The style files for ICLR and other conference information are available online at:
 \begin{center}
   \url{http://www.iclr.cc/}
 \end{center}
 The file \verb+iclr2026_conference.pdf+ contains these
 instructions and illustrates the
 various formatting requirements your ICLR paper must satisfy.
 Submissions must be made using \LaTeX{} and the style files
 \verb+iclr2026_conference.sty+ and \verb+iclr2026_conference.bst+ (to be used with \LaTeX{}2e). The file
 \verb+iclr2026_conference.tex+ may be used as a ``shell'' for writing your paper. All you
 have to do is replace the author, title, abstract, and text of the paper with
 your own.
 The formatting instructions contained in these style files are summarized in
 sections \ref{gen_inst}, \ref{headings}, and \ref{others} below.
 \section{General formatting instructions}
 \label{gen_inst}
 The text must be confined within a rectangle 5.5~inches (33~picas) wide and
 9~inches (54~picas) long. The left margin is 1.5~inch (9~picas).
 Use 10~point type with a vertical spacing of 11~points. Times New Roman is the
 preferred typeface throughout. Paragraphs are separated by 1/2~line space,
 with no indentation.
 Paper title is 17~point, in small caps and left-aligned.
 All pages should start at 1~inch (6~picas) from the top of the page.
 Authors' names are
 set in boldface, and each name is placed above its corresponding
 address. The lead author's name is to be listed first, and
 the co-authors' names are set to follow. Authors sharing the
 same address can be on the same line.
 Please pay special attention to the instructions in section \ref{others}
 regarding figures, tables, acknowledgments, and references.
 There will be a strict upper limit of \textbf{9 pages} for the main text of the initial submission, with unlimited additional pages for citations. This limit will be expanded to \textbf{10 pages} for rebuttal/camera ready.
 \section{Headings: first level}
 \label{headings}
 First level headings are in small caps,
 flush left and in point size 12. One line space before the first level
 heading and 1/2~line space after the first level heading.
 \subsection{Headings: second level}
 Second level headings are in small caps,
 flush left and in point size 10. One line space before the second level
 heading and 1/2~line space after the second level heading.
 \subsubsection{Headings: third level}
 Third level headings are in small caps,
 flush left and in point size 10. One line space before the third level
 heading and 1/2~line space after the third level heading.
 \section{Citations, figures, tables, references}
 \label{others}
 These instructions apply to everyone, regardless of the formatter being used.
 \subsection{Citations within the text}
 Citations within the text should be based on the \texttt{natbib} package
 and include the authors' last names and year (with the ``et~al.'' construct
 for more than two authors). When the authors or the publication are
 included in the sentence, the citation should not be in parenthesis using \verb|\citet{}| (as
 in ``See \citet{Hinton06} for more information.''). Otherwise, the citation
 should be in parenthesis using \verb|\citep{}| (as in ``Deep learning shows promise to make progress
 towards AI~\citep{Bengio+chapter2007}.'').
 The corresponding references are to be listed in alphabetical order of
 authors, in the \textsc{References} section. As to the format of the
 references themselves, any style is acceptable as long as it is used
 consistently.
 \subsection{Footnotes}
 Indicate footnotes with a number\footnote{Sample of the first footnote} in the
 text. Place the footnotes at the bottom of the page on which they appear.
 Precede the footnote with a horizontal rule of 2~inches
 (12~picas).\footnote{Sample of the second footnote}
 \subsection{Figures}
 All artwork must be neat, clean, and legible. Lines should be dark
 enough for purposes of reproduction; art work should not be
 hand-drawn. The figure number and caption always appear after the
 figure. Place one line space before the figure caption, and one line
 space after the figure. The figure caption is lower case (except for
 first word and proper nouns); figures are numbered consecutively.
 Make sure the figure caption does not get separated from the figure.
 Leave sufficient space to avoid splitting the figure and figure caption.
 You may use color figures.
 However, it is best for the
 figure captions and the paper body to make sense if the paper is printed
 either in black/white or in color.
 \begin{figure}[h]
 \begin{center}
 %\framebox[4.0in]{$\;$}
 \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
 \end{center}
 \caption{Sample figure caption.}
 \end{figure}
 \subsection{Tables}
 All tables must be centered, neat, clean and legible. Do not use hand-drawn
 tables. The table number and title always appear before the table. See
 Table~\ref{sample-table}.
 Place one line space before the table title, one line space after the table
 title, and one line space after the table. The table title must be lower case
 (except for first word and proper nouns); tables are numbered consecutively.
 \begin{table}[t]
 \caption{Sample table title}
 \label{sample-table}
 \begin{center}
 \begin{tabular}{ll}
 \multicolumn{1}{c}{\bf PART}  &\multicolumn{1}{c}{\bf DESCRIPTION}
 \\ \hline \\
 Dendrite         &Input terminal \\
 Axon             &Output terminal \\
 Soma             &Cell body (contains cell nucleus) \\
 \end{tabular}
 \end{center}
 \end{table}
 \section{Default Notation}
 In an attempt to encourage standardized notation, we have included the
 notation file from the textbook, \textit{Deep Learning}
 \cite{goodfellow2016deep} available at
 \url{https://github.com/goodfeli/dlbook_notation/}.  Use of this style
 is not required and can be disabled by commenting out
 \texttt{math\_commands.tex}.
 \centerline{\bf Numbers and Arrays}
 \bgroup
 \def\arraystretch{1.5}
 \begin{tabular}{p{1in}p{3.25in}}
 $\displaystyle a$ & A scalar (integer or real)\\
 $\displaystyle \va$ & A vector\\
 $\displaystyle \mA$ & A matrix\\
 $\displaystyle \tA$ & A tensor\\
 $\displaystyle \mI_n$ & Identity matrix with $n$ rows and $n$ columns\\
 $\displaystyle \mI$ & Identity matrix with dimensionality implied by context\\
 $\displaystyle \ve^{(i)}$ & Standard basis vector $[0,\dots,0,1,0,\dots,0]$ with a 1 at position $i$\\
 $\displaystyle \text{diag}(\va)$ & A square, diagonal matrix with diagonal entries given by $\va$\\
 $\displaystyle \ra$ & A scalar random variable\\
 $\displaystyle \rva$ & A vector-valued random variable\\
 $\displaystyle \rmA$ & A matrix-valued random variable\\
 \end{tabular}
 \egroup
 \vspace{0.25cm}
 \centerline{\bf Sets and Graphs}
 \bgroup
 \def\arraystretch{1.5}
 \begin{tabular}{p{1.25in}p{3.25in}}
 $\displaystyle \sA$ & A set\\
 $\displaystyle \R$ & The set of real numbers \\
 $\displaystyle \{0, 1\}$ & The set containing 0 and 1 \\
 $\displaystyle \{0, 1, \dots, n \}$ & The set of all integers between $0$ and $n$\\
 $\displaystyle [a, b]$ & The real interval including $a$ and $b$\\
 $\displaystyle (a, b]$ & The real interval excluding $a$ but including $b$\\
 $\displaystyle \sA \backslash \sB$ & Set subtraction, i.e., the set containing the elements of $\sA$ that are not in $\sB$\\
 $\displaystyle \gG$ & A graph\\
 $\displaystyle \parents_\gG(\ervx_i)$ & The parents of $\ervx_i$ in $\gG$
 \end{tabular}
 \vspace{0.25cm}
 \centerline{\bf Indexing}
 \bgroup
 \def\arraystretch{1.5}
 \begin{tabular}{p{1.25in}p{3.25in}}
 $\displaystyle \eva_i$ & Element $i$ of vector $\va$, with indexing starting at 1 \\
 $\displaystyle \eva_{-i}$ & All elements of vector $\va$ except for element $i$ \\
 $\displaystyle \emA_{i,j}$ & Element $i, j$ of matrix $\mA$ \\
 $\displaystyle \mA_{i, :}$ & Row $i$ of matrix $\mA$ \\
 $\displaystyle \mA_{:, i}$ & Column $i$ of matrix $\mA$ \\
 $\displaystyle \etA_{i, j, k}$ & Element $(i, j, k)$ of a 3-D tensor $\tA$\\
 $\displaystyle \tA_{:, :, i}$ & 2-D slice of a 3-D tensor\\
 $\displaystyle \erva_i$ & Element $i$ of the random vector $\rva$ \\
 \end{tabular}
 \egroup
 \vspace{0.25cm}
 \centerline{\bf Calculus}
 \bgroup
 \def\arraystretch{1.5}
 \begin{tabular}{p{1.25in}p{3.25in}}
 % NOTE: the [2ex] on the next line adds extra height to that row of the table.
 % Without that command, the fraction on the first line is too tall and collides
 % with the fraction on the second line.
 $\displaystyle\frac{d y} {d x}$ & Derivative of $y$ with respect to $x$\\ [2ex]
 $\displaystyle \frac{\partial y} {\partial x} $ & Partial derivative of $y$ with respect to $x$ \\
 $\displaystyle \nabla_\vx y $ & Gradient of $y$ with respect to $\vx$ \\
 $\displaystyle \nabla_\mX y $ & Matrix derivatives of $y$ with respect to $\mX$ \\
 $\displaystyle \nabla_\tX y $ & Tensor containing derivatives of $y$ with respect to $\tX$ \\
 $\displaystyle \frac{\partial f}{\partial \vx} $ & Jacobian matrix $\mJ \in \R^{m\times n}$ of $f: \R^n \rightarrow \R^m$\\
 $\displaystyle \nabla_\vx^2 f(\vx)\text{ or }\mH( f)(\vx)$ & The Hessian matrix of $f$ at input point $\vx$\\
 $\displaystyle \int f(\vx) d\vx $ & Definite integral over the entire domain of $\vx$ \\
 $\displaystyle \int_\sS f(\vx) d\vx$ & Definite integral with respect to $\vx$ over the set $\sS$ \\
 \end{tabular}
 \egroup
 \vspace{0.25cm}
 \centerline{\bf Probability and Information Theory}
 \bgroup
 \def\arraystretch{1.5}
 \begin{tabular}{p{1.25in}p{3.25in}}
 $\displaystyle P(\ra)$ & A probability distribution over a discrete variable\\
 $\displaystyle p(\ra)$ & A probability distribution over a continuous variable, or over
 a variable whose type has not been specified\\
 $\displaystyle \ra \sim P$ & Random variable $\ra$ has distribution $P$\\% so thing on left of \sim should always be a random variable, with name beginning with \r
 $\displaystyle  \E_{\rx\sim P} [ f(x) ]\text{ or } \E f(x)$ & Expectation of $f(x)$ with respect to $P(\rx)$ \\
 $\displaystyle \Var(f(x)) $ &  Variance of $f(x)$ under $P(\rx)$ \\
 $\displaystyle \Cov(f(x),g(x)) $ & Covariance of $f(x)$ and $g(x)$ under $P(\rx)$\\
 $\displaystyle H(\rx) $ & Shannon entropy of the random variable $\rx$\\
 $\displaystyle \KL ( P \Vert Q ) $ & Kullback-Leibler divergence of P and Q \\
 $\displaystyle \mathcal{N} ( \vx ; \vmu , \mSigma)$ & Gaussian distribution %
 over $\vx$ with mean $\vmu$ and covariance $\mSigma$ \\
 \end{tabular}
 \egroup
 \vspace{0.25cm}
 \centerline{\bf Functions}
 \bgroup
 \def\arraystretch{1.5}
 \begin{tabular}{p{1.25in}p{3.25in}}
 $\displaystyle f: \sA \rightarrow \sB$ & The function $f$ with domain $\sA$ and range $\sB$\\
 $\displaystyle f \circ g $ & Composition of the functions $f$ and $g$ \\
  $\displaystyle f(\vx ; \vtheta) $ & A function of $\vx$ parametrized by $\vtheta$.
  (Sometimes we write $f(\vx)$ and omit the argument $\vtheta$ to lighten notation) \\
 $\displaystyle \log x$ & Natural logarithm of $x$ \\
 $\displaystyle \sigma(x)$ & Logistic sigmoid, $\displaystyle \frac{1} {1 + \exp(-x)}$ \\
 $\displaystyle \zeta(x)$ & Softplus, $\log(1 + \exp(x))$ \\
 $\displaystyle || \vx ||_p $ & $\normlp$ norm of $\vx$ \\
 $\displaystyle || \vx || $ & $\normltwo$ norm of $\vx$ \\
 $\displaystyle x^+$ & Positive part of $x$, i.e., $\max(0,x)$\\
 $\displaystyle \1_\mathrm{condition}$ & is 1 if the condition is true, 0 otherwise\\
 \end{tabular}
 \egroup
 \vspace{0.25cm}
 \section{Final instructions}
 Do not change any aspects of the formatting parameters in the style files.
 In particular, do not modify the width or length of the rectangle the text
 should fit into, and do not change font sizes (except perhaps in the
 \textsc{References} section; see below). Please note that pages should be
 numbered.
 \section{Preparing PostScript or PDF files}
 Please prepare PostScript or PDF files with paper size ``US Letter'', and
 not, for example, ``A4''. The -t
 letter option on dvips will produce US Letter files.
 Consider directly generating PDF files using \verb+pdflatex+
 (especially if you are a MiKTeX user).
 PDF figures must be substituted for EPS figures, however.
 Otherwise, please generate your PostScript and PDF files with the following commands:
 \begin{verbatim}
 dvips mypaper.dvi -t letter -Ppdf -G0 -o mypaper.ps
 ps2pdf mypaper.ps mypaper.pdf
 \end{verbatim}
 \subsection{Margins in LaTeX}
 Most of the margin problems come from figures positioned by hand using
 \verb+\special+ or other commands. We suggest using the command
 \verb+\includegraphics+
 from the graphicx package. Always specify the figure width as a multiple of
 the line width as in the example below using .eps graphics
 \begin{verbatim}
   \usepackage[dvips]{graphicx} ...
   \includegraphics[width=0.8\linewidth]{myfile.eps}
 \end{verbatim}
 or % Apr 2009 addition
 \begin{verbatim}
   \usepackage[pdftex]{graphicx} ...
   \includegraphics[width=0.8\linewidth]{myfile.pdf}
 \end{verbatim}
 for .pdf graphics.
 See section~4.4 in the graphics bundle documentation (\url{http://www.ctan.org/tex-archive/macros/latex/required/graphics/grfguide.ps})
 A number of width problems arise when LaTeX cannot properly hyphenate a
 line. Please give LaTeX hyphenation hints using the \verb+\-+ command.
 \subsubsection*{Author Contributions}
 If you'd like to, you may include  a section for author contributions as is done
 in many journals. This is optional and at the discretion of the authors.
 \subsubsection*{Acknowledgments}
 Use unnumbered third level headings for the acknowledgments. All
 acknowledgments, including those to funding agencies, go at the end of the paper.
 \bibliography{iclr2026_conference}
 \bibliographystyle{iclr2026_conference}
 \appendix
 \section{Appendix}
 You may include other additional sections here.
 \end{document}
--- a/skills/mlops/ml-paper-writing/templates/iclr2026/math_commands.tex
+++ b/skills/mlops/ml-paper-writing/templates/iclr2026/math_commands.tex
@ -0,0 +1,508 @@
 %%%%% NEW MATH DEFINITIONS %%%%%
 \usepackage{amsmath,amsfonts,bm}
 % Mark sections of captions for referring to divisions of figures
 \newcommand{\figleft}{{\em (Left)}}
 \newcommand{\figcenter}{{\em (Center)}}
 \newcommand{\figright}{{\em (Right)}}
 \newcommand{\figtop}{{\em (Top)}}
 \newcommand{\figbottom}{{\em (Bottom)}}
 \newcommand{\captiona}{{\em (a)}}
 \newcommand{\captionb}{{\em (b)}}
 \newcommand{\captionc}{{\em (c)}}
 \newcommand{\captiond}{{\em (d)}}
 % Highlight a newly defined term
 \newcommand{\newterm}[1]{{\bf #1}}
 % Figure reference, lower-case.
 \def\figref#1{figure~\ref{#1}}
 % Figure reference, capital. For start of sentence
 \def\Figref#1{Figure~\ref{#1}}
 \def\twofigref#1#2{figures \ref{#1} and \ref{#2}}
 \def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}}
 % Section reference, lower-case.
 \def\secref#1{section~\ref{#1}}
 % Section reference, capital.
 \def\Secref#1{Section~\ref{#1}}
 % Reference to two sections.
 \def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}}
 % Reference to three sections.
 \def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}}
 % Reference to an equation, lower-case.
 \def\eqref#1{equation~\ref{#1}}
 % Reference to an equation, upper case
 \def\Eqref#1{Equation~\ref{#1}}
 % A raw reference to an equation---avoid using if possible
 \def\plaineqref#1{\ref{#1}}
 % Reference to a chapter, lower-case.
 \def\chapref#1{chapter~\ref{#1}}
 % Reference to an equation, upper case.
 \def\Chapref#1{Chapter~\ref{#1}}
 % Reference to a range of chapters
 \def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}}
 % Reference to an algorithm, lower-case.
 \def\algref#1{algorithm~\ref{#1}}
 % Reference to an algorithm, upper case.
 \def\Algref#1{Algorithm~\ref{#1}}
 \def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}}
 \def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}}
 % Reference to a part, lower case
 \def\partref#1{part~\ref{#1}}
 % Reference to a part, upper case
 \def\Partref#1{Part~\ref{#1}}
 \def\twopartref#1#2{parts \ref{#1} and \ref{#2}}
 \def\ceil#1{\lceil #1 \rceil}
 \def\floor#1{\lfloor #1 \rfloor}
 \def\1{\bm{1}}
 \newcommand{\train}{\mathcal{D}}
 \newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}}
 \newcommand{\test}{\mathcal{D_{\mathrm{test}}}}
 \def\eps{{\epsilon}}
 % Random variables
 \def\reta{{\textnormal{$\eta$}}}
 \def\ra{{\textnormal{a}}}
 \def\rb{{\textnormal{b}}}
 \def\rc{{\textnormal{c}}}
 \def\rd{{\textnormal{d}}}
 \def\re{{\textnormal{e}}}
 \def\rf{{\textnormal{f}}}
 \def\rg{{\textnormal{g}}}
 \def\rh{{\textnormal{h}}}
 \def\ri{{\textnormal{i}}}
 \def\rj{{\textnormal{j}}}
 \def\rk{{\textnormal{k}}}
 \def\rl{{\textnormal{l}}}
 % rm is already a command, just don't name any random variables m
 \def\rn{{\textnormal{n}}}
 \def\ro{{\textnormal{o}}}
 \def\rp{{\textnormal{p}}}
 \def\rq{{\textnormal{q}}}
 \def\rr{{\textnormal{r}}}
 \def\rs{{\textnormal{s}}}
 \def\rt{{\textnormal{t}}}
 \def\ru{{\textnormal{u}}}
 \def\rv{{\textnormal{v}}}
 \def\rw{{\textnormal{w}}}
 \def\rx{{\textnormal{x}}}
 \def\ry{{\textnormal{y}}}
 \def\rz{{\textnormal{z}}}
 % Random vectors
 \def\rvepsilon{{\mathbf{\epsilon}}}
 \def\rvtheta{{\mathbf{\theta}}}
 \def\rva{{\mathbf{a}}}
 \def\rvb{{\mathbf{b}}}
 \def\rvc{{\mathbf{c}}}
 \def\rvd{{\mathbf{d}}}
 \def\rve{{\mathbf{e}}}
 \def\rvf{{\mathbf{f}}}
 \def\rvg{{\mathbf{g}}}
 \def\rvh{{\mathbf{h}}}
 \def\rvu{{\mathbf{i}}}
 \def\rvj{{\mathbf{j}}}
 \def\rvk{{\mathbf{k}}}
 \def\rvl{{\mathbf{l}}}
 \def\rvm{{\mathbf{m}}}
 \def\rvn{{\mathbf{n}}}
 \def\rvo{{\mathbf{o}}}
 \def\rvp{{\mathbf{p}}}
 \def\rvq{{\mathbf{q}}}
 \def\rvr{{\mathbf{r}}}
 \def\rvs{{\mathbf{s}}}
 \def\rvt{{\mathbf{t}}}
 \def\rvu{{\mathbf{u}}}
 \def\rvv{{\mathbf{v}}}
 \def\rvw{{\mathbf{w}}}
 \def\rvx{{\mathbf{x}}}
 \def\rvy{{\mathbf{y}}}
 \def\rvz{{\mathbf{z}}}
 % Elements of random vectors
 \def\erva{{\textnormal{a}}}
 \def\ervb{{\textnormal{b}}}
 \def\ervc{{\textnormal{c}}}
 \def\ervd{{\textnormal{d}}}
 \def\erve{{\textnormal{e}}}
 \def\ervf{{\textnormal{f}}}
 \def\ervg{{\textnormal{g}}}
 \def\ervh{{\textnormal{h}}}
 \def\ervi{{\textnormal{i}}}
 \def\ervj{{\textnormal{j}}}
 \def\ervk{{\textnormal{k}}}
 \def\ervl{{\textnormal{l}}}
 \def\ervm{{\textnormal{m}}}
 \def\ervn{{\textnormal{n}}}
 \def\ervo{{\textnormal{o}}}
 \def\ervp{{\textnormal{p}}}
 \def\ervq{{\textnormal{q}}}
 \def\ervr{{\textnormal{r}}}
 \def\ervs{{\textnormal{s}}}
 \def\ervt{{\textnormal{t}}}
 \def\ervu{{\textnormal{u}}}
 \def\ervv{{\textnormal{v}}}
 \def\ervw{{\textnormal{w}}}
 \def\ervx{{\textnormal{x}}}
 \def\ervy{{\textnormal{y}}}
 \def\ervz{{\textnormal{z}}}
 % Random matrices
 \def\rmA{{\mathbf{A}}}
 \def\rmB{{\mathbf{B}}}
 \def\rmC{{\mathbf{C}}}
 \def\rmD{{\mathbf{D}}}
 \def\rmE{{\mathbf{E}}}
 \def\rmF{{\mathbf{F}}}
 \def\rmG{{\mathbf{G}}}
 \def\rmH{{\mathbf{H}}}
 \def\rmI{{\mathbf{I}}}
 \def\rmJ{{\mathbf{J}}}
 \def\rmK{{\mathbf{K}}}
 \def\rmL{{\mathbf{L}}}
 \def\rmM{{\mathbf{M}}}
 \def\rmN{{\mathbf{N}}}
 \def\rmO{{\mathbf{O}}}
 \def\rmP{{\mathbf{P}}}
 \def\rmQ{{\mathbf{Q}}}
 \def\rmR{{\mathbf{R}}}
 \def\rmS{{\mathbf{S}}}
 \def\rmT{{\mathbf{T}}}
 \def\rmU{{\mathbf{U}}}
 \def\rmV{{\mathbf{V}}}
 \def\rmW{{\mathbf{W}}}
 \def\rmX{{\mathbf{X}}}
 \def\rmY{{\mathbf{Y}}}
 \def\rmZ{{\mathbf{Z}}}
 % Elements of random matrices
 \def\ermA{{\textnormal{A}}}
 \def\ermB{{\textnormal{B}}}
 \def\ermC{{\textnormal{C}}}
 \def\ermD{{\textnormal{D}}}
 \def\ermE{{\textnormal{E}}}
 \def\ermF{{\textnormal{F}}}
 \def\ermG{{\textnormal{G}}}
 \def\ermH{{\textnormal{H}}}
 \def\ermI{{\textnormal{I}}}
 \def\ermJ{{\textnormal{J}}}
 \def\ermK{{\textnormal{K}}}
 \def\ermL{{\textnormal{L}}}
 \def\ermM{{\textnormal{M}}}
 \def\ermN{{\textnormal{N}}}
 \def\ermO{{\textnormal{O}}}
 \def\ermP{{\textnormal{P}}}
 \def\ermQ{{\textnormal{Q}}}
 \def\ermR{{\textnormal{R}}}
 \def\ermS{{\textnormal{S}}}
 \def\ermT{{\textnormal{T}}}
 \def\ermU{{\textnormal{U}}}
 \def\ermV{{\textnormal{V}}}
 \def\ermW{{\textnormal{W}}}
 \def\ermX{{\textnormal{X}}}
 \def\ermY{{\textnormal{Y}}}
 \def\ermZ{{\textnormal{Z}}}
 % Vectors
 \def\vzero{{\bm{0}}}
 \def\vone{{\bm{1}}}
 \def\vmu{{\bm{\mu}}}
 \def\vtheta{{\bm{\theta}}}
 \def\va{{\bm{a}}}
 \def\vb{{\bm{b}}}
 \def\vc{{\bm{c}}}
 \def\vd{{\bm{d}}}
 \def\ve{{\bm{e}}}
 \def\vf{{\bm{f}}}
 \def\vg{{\bm{g}}}
 \def\vh{{\bm{h}}}
 \def\vi{{\bm{i}}}
 \def\vj{{\bm{j}}}
 \def\vk{{\bm{k}}}
 \def\vl{{\bm{l}}}
 \def\vm{{\bm{m}}}
 \def\vn{{\bm{n}}}
 \def\vo{{\bm{o}}}
 \def\vp{{\bm{p}}}
 \def\vq{{\bm{q}}}
 \def\vr{{\bm{r}}}
 \def\vs{{\bm{s}}}
 \def\vt{{\bm{t}}}
 \def\vu{{\bm{u}}}
 \def\vv{{\bm{v}}}
 \def\vw{{\bm{w}}}
 \def\vx{{\bm{x}}}
 \def\vy{{\bm{y}}}
 \def\vz{{\bm{z}}}
 % Elements of vectors
 \def\evalpha{{\alpha}}
 \def\evbeta{{\beta}}
 \def\evepsilon{{\epsilon}}
 \def\evlambda{{\lambda}}
 \def\evomega{{\omega}}
 \def\evmu{{\mu}}
 \def\evpsi{{\psi}}
 \def\evsigma{{\sigma}}
 \def\evtheta{{\theta}}
 \def\eva{{a}}
 \def\evb{{b}}
 \def\evc{{c}}
 \def\evd{{d}}
 \def\eve{{e}}
 \def\evf{{f}}
 \def\evg{{g}}
 \def\evh{{h}}
 \def\evi{{i}}
 \def\evj{{j}}
 \def\evk{{k}}
 \def\evl{{l}}
 \def\evm{{m}}
 \def\evn{{n}}
 \def\evo{{o}}
 \def\evp{{p}}
 \def\evq{{q}}
 \def\evr{{r}}
 \def\evs{{s}}
 \def\evt{{t}}
 \def\evu{{u}}
 \def\evv{{v}}
 \def\evw{{w}}
 \def\evx{{x}}
 \def\evy{{y}}
 \def\evz{{z}}
 % Matrix
 \def\mA{{\bm{A}}}
 \def\mB{{\bm{B}}}
 \def\mC{{\bm{C}}}
 \def\mD{{\bm{D}}}
 \def\mE{{\bm{E}}}
 \def\mF{{\bm{F}}}
 \def\mG{{\bm{G}}}
 \def\mH{{\bm{H}}}
 \def\mI{{\bm{I}}}
 \def\mJ{{\bm{J}}}
 \def\mK{{\bm{K}}}
 \def\mL{{\bm{L}}}
 \def\mM{{\bm{M}}}
 \def\mN{{\bm{N}}}
 \def\mO{{\bm{O}}}
 \def\mP{{\bm{P}}}
 \def\mQ{{\bm{Q}}}
 \def\mR{{\bm{R}}}
 \def\mS{{\bm{S}}}
 \def\mT{{\bm{T}}}
 \def\mU{{\bm{U}}}
 \def\mV{{\bm{V}}}
 \def\mW{{\bm{W}}}
 \def\mX{{\bm{X}}}
 \def\mY{{\bm{Y}}}
 \def\mZ{{\bm{Z}}}
 \def\mBeta{{\bm{\beta}}}
 \def\mPhi{{\bm{\Phi}}}
 \def\mLambda{{\bm{\Lambda}}}
 \def\mSigma{{\bm{\Sigma}}}
 % Tensor
 \DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl}
 \SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n}
 \newcommand{\tens}[1]{\bm{\mathsfit{#1}}}
 \def\tA{{\tens{A}}}
 \def\tB{{\tens{B}}}
 \def\tC{{\tens{C}}}
 \def\tD{{\tens{D}}}
 \def\tE{{\tens{E}}}
 \def\tF{{\tens{F}}}
 \def\tG{{\tens{G}}}
 \def\tH{{\tens{H}}}
 \def\tI{{\tens{I}}}
 \def\tJ{{\tens{J}}}
 \def\tK{{\tens{K}}}
 \def\tL{{\tens{L}}}
 \def\tM{{\tens{M}}}
 \def\tN{{\tens{N}}}
 \def\tO{{\tens{O}}}
 \def\tP{{\tens{P}}}
 \def\tQ{{\tens{Q}}}
 \def\tR{{\tens{R}}}
 \def\tS{{\tens{S}}}
 \def\tT{{\tens{T}}}
 \def\tU{{\tens{U}}}
 \def\tV{{\tens{V}}}
 \def\tW{{\tens{W}}}
 \def\tX{{\tens{X}}}
 \def\tY{{\tens{Y}}}
 \def\tZ{{\tens{Z}}}
 % Graph
 \def\gA{{\mathcal{A}}}
 \def\gB{{\mathcal{B}}}
 \def\gC{{\mathcal{C}}}
 \def\gD{{\mathcal{D}}}
 \def\gE{{\mathcal{E}}}
 \def\gF{{\mathcal{F}}}
 \def\gG{{\mathcal{G}}}
 \def\gH{{\mathcal{H}}}
 \def\gI{{\mathcal{I}}}
 \def\gJ{{\mathcal{J}}}
 \def\gK{{\mathcal{K}}}
 \def\gL{{\mathcal{L}}}
 \def\gM{{\mathcal{M}}}
 \def\gN{{\mathcal{N}}}
 \def\gO{{\mathcal{O}}}
 \def\gP{{\mathcal{P}}}
 \def\gQ{{\mathcal{Q}}}
 \def\gR{{\mathcal{R}}}
 \def\gS{{\mathcal{S}}}
 \def\gT{{\mathcal{T}}}
 \def\gU{{\mathcal{U}}}
 \def\gV{{\mathcal{V}}}
 \def\gW{{\mathcal{W}}}
 \def\gX{{\mathcal{X}}}
 \def\gY{{\mathcal{Y}}}
 \def\gZ{{\mathcal{Z}}}
 % Sets
 \def\sA{{\mathbb{A}}}
 \def\sB{{\mathbb{B}}}
 \def\sC{{\mathbb{C}}}
 \def\sD{{\mathbb{D}}}
 % Don't use a set called E, because this would be the same as our symbol
 % for expectation.
 \def\sF{{\mathbb{F}}}
 \def\sG{{\mathbb{G}}}
 \def\sH{{\mathbb{H}}}
 \def\sI{{\mathbb{I}}}
 \def\sJ{{\mathbb{J}}}
 \def\sK{{\mathbb{K}}}
 \def\sL{{\mathbb{L}}}
 \def\sM{{\mathbb{M}}}
 \def\sN{{\mathbb{N}}}
 \def\sO{{\mathbb{O}}}
 \def\sP{{\mathbb{P}}}
 \def\sQ{{\mathbb{Q}}}
 \def\sR{{\mathbb{R}}}
 \def\sS{{\mathbb{S}}}
 \def\sT{{\mathbb{T}}}
 \def\sU{{\mathbb{U}}}
 \def\sV{{\mathbb{V}}}
 \def\sW{{\mathbb{W}}}
 \def\sX{{\mathbb{X}}}
 \def\sY{{\mathbb{Y}}}
 \def\sZ{{\mathbb{Z}}}
 % Entries of a matrix
 \def\emLambda{{\Lambda}}
 \def\emA{{A}}
 \def\emB{{B}}
 \def\emC{{C}}
 \def\emD{{D}}
 \def\emE{{E}}
 \def\emF{{F}}
 \def\emG{{G}}
 \def\emH{{H}}
 \def\emI{{I}}
 \def\emJ{{J}}
 \def\emK{{K}}
 \def\emL{{L}}
 \def\emM{{M}}
 \def\emN{{N}}
 \def\emO{{O}}
 \def\emP{{P}}
 \def\emQ{{Q}}
 \def\emR{{R}}
 \def\emS{{S}}
 \def\emT{{T}}
 \def\emU{{U}}
 \def\emV{{V}}
 \def\emW{{W}}
 \def\emX{{X}}
 \def\emY{{Y}}
 \def\emZ{{Z}}
 \def\emSigma{{\Sigma}}
 % entries of a tensor
 % Same font as tensor, without \bm wrapper
 \newcommand{\etens}[1]{\mathsfit{#1}}
 \def\etLambda{{\etens{\Lambda}}}
 \def\etA{{\etens{A}}}
 \def\etB{{\etens{B}}}
 \def\etC{{\etens{C}}}
 \def\etD{{\etens{D}}}
 \def\etE{{\etens{E}}}
 \def\etF{{\etens{F}}}
 \def\etG{{\etens{G}}}
 \def\etH{{\etens{H}}}
 \def\etI{{\etens{I}}}
 \def\etJ{{\etens{J}}}
 \def\etK{{\etens{K}}}
 \def\etL{{\etens{L}}}
 \def\etM{{\etens{M}}}
 \def\etN{{\etens{N}}}
 \def\etO{{\etens{O}}}
 \def\etP{{\etens{P}}}
 \def\etQ{{\etens{Q}}}
 \def\etR{{\etens{R}}}
 \def\etS{{\etens{S}}}
 \def\etT{{\etens{T}}}
 \def\etU{{\etens{U}}}
 \def\etV{{\etens{V}}}
 \def\etW{{\etens{W}}}
 \def\etX{{\etens{X}}}
 \def\etY{{\etens{Y}}}
 \def\etZ{{\etens{Z}}}
 % The true underlying data generating distribution
 \newcommand{\pdata}{p_{\rm{data}}}
 % The empirical distribution defined by the training set
 \newcommand{\ptrain}{\hat{p}_{\rm{data}}}
 \newcommand{\Ptrain}{\hat{P}_{\rm{data}}}
 % The model distribution
 \newcommand{\pmodel}{p_{\rm{model}}}
 \newcommand{\Pmodel}{P_{\rm{model}}}
 \newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}}
 % Stochastic autoencoder distributions
 \newcommand{\pencode}{p_{\rm{encoder}}}
 \newcommand{\pdecode}{p_{\rm{decoder}}}
 \newcommand{\precons}{p_{\rm{reconstruct}}}
 \newcommand{\laplace}{\mathrm{Laplace}} % Laplace distribution
 \newcommand{\E}{\mathbb{E}}
 \newcommand{\Ls}{\mathcal{L}}
 \newcommand{\R}{\mathbb{R}}
 \newcommand{\emp}{\tilde{p}}
 \newcommand{\lr}{\alpha}
 \newcommand{\reg}{\lambda}
 \newcommand{\rect}{\mathrm{rectifier}}
 \newcommand{\softmax}{\mathrm{softmax}}
 \newcommand{\sigmoid}{\sigma}
 \newcommand{\softplus}{\zeta}
 \newcommand{\KL}{D_{\mathrm{KL}}}
 \newcommand{\Var}{\mathrm{Var}}
 \newcommand{\standarderror}{\mathrm{SE}}
 \newcommand{\Cov}{\mathrm{Cov}}
 % Wolfram Mathworld says $L^2$ is for function spaces and $\ell^2$ is for vectors
 % But then they seem to use $L^2$ for vectors throughout the site, and so does
 % wikipedia.
 \newcommand{\normlzero}{L^0}
 \newcommand{\normlone}{L^1}
 \newcommand{\normltwo}{L^2}
 \newcommand{\normlp}{L^p}
 \newcommand{\normmax}{L^\infty}
 \newcommand{\parents}{Pa} % See usage in notation.tex. Chosen to match Daphne's book.
 \DeclareMathOperator*{\argmax}{arg\,max}
 \DeclareMathOperator*{\argmin}{arg\,min}
 \DeclareMathOperator{\sign}{sign}
 \DeclareMathOperator{\Tr}{Tr}
 \let\ab\allowbreak
--- a/Show more
+++ b/Show more
		`@ -0,0 +1,3 @@`
							`# Template`

							`Template and style files for CoLM 2025`