Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews Add AGENTS and SOUL.md support Add Exec Approval
2026-02-12 10:05:08 -08:00 · 2026-02-12 10:05:08 -08:00 · f5be6177b2
commit f5be6177b2
parent 89c6f24d48
18 changed files with 1200 additions and 21 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -99,6 +99,24 @@ DEFAULT_CONFIG = {
        "personality": "kawaii",
    },
    
+    # Text-to-speech configuration
+    "tts": {
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai"
+        "edge": {
+            "voice": "en-US-AriaNeural",
+            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
+        },
+        "elevenlabs": {
+            "voice_id": "pNInz6obpgDQGcFmaJgB",  # Adam
+            "model_id": "eleven_multilingual_v2",
+        },
+        "openai": {
+            "model": "gpt-4o-mini-tts",
+            "voice": "alloy",
+            # Voices: alloy, echo, fable, onyx, nova, shimmer
+        },
+    },
+    
    # Permanently allowed dangerous command patterns (added via "always" approval)
    "command_allowlist": [],
    
@ -202,6 +220,13 @@ OPTIONAL_ENV_VARS = {
        "url": None,
        "password": False,
    },
+    # Text-to-speech (premium providers)
+    "ELEVENLABS_API_KEY": {
+        "description": "ElevenLabs API key for premium text-to-speech voices",
+        "prompt": "ElevenLabs API key",
+        "url": "https://elevenlabs.io/",
+        "password": True,
+    },
    # Terminal configuration
    "MESSAGING_CWD": {
        "description": "Working directory for terminal commands via messaging (Telegram/Discord/etc). CLI always uses current directory.",
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -186,6 +186,11 @@ def _print_setup_summary(config: dict, hermes_home):
    else:
        tool_status.append(("Image Generation", False, "FAL_KEY"))
    
+    # TTS (always available via Edge TTS; ElevenLabs/OpenAI are optional)
+    tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
+    if get_env_value('ELEVENLABS_API_KEY'):
+        tool_status.append(("Text-to-Speech (ElevenLabs)", True, None))
+    
    # Tinker + WandB (RL training)
    if get_env_value('TINKER_API_KEY') and get_env_value('WANDB_API_KEY'):
        tool_status.append(("RL Training (Tinker)", True, None))
@ -991,6 +996,28 @@ def run_setup_wizard(args):
                print_success("    Configured ✓")
    print()
    
+    # ElevenLabs - Premium TTS
+    print_info("─" * 50)
+    print(color("  Text-to-Speech - ElevenLabs (Premium)", Colors.CYAN))
+    print_info("  Enables: Premium TTS voices (Edge TTS is free and works without a key)")
+    print_info("  Use case: High-quality, customizable voice synthesis")
+    if get_env_value('ELEVENLABS_API_KEY'):
+        print_success("  Status: Configured ✓")
+        if prompt_yes_no("  Update ElevenLabs API key?", False):
+            api_key = prompt("    API key", password=True)
+            if api_key:
+                save_env_value("ELEVENLABS_API_KEY", api_key)
+                print_success("    Updated")
+    else:
+        print_warning("  Status: Not configured (free Edge TTS will be used by default)")
+        if prompt_yes_no("  Set up ElevenLabs?", False):
+            print_info("    Get your API key at: https://elevenlabs.io/")
+            api_key = prompt("    API key", password=True)
+            if api_key:
+                save_env_value("ELEVENLABS_API_KEY", api_key)
+                print_success("    Configured ✓")
+    print()
+    
    # Tinker + WandB - RL Training
    print_info("─" * 50)
    print(color("  RL Training (Tinker + WandB)", Colors.CYAN))
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@ -76,6 +76,7 @@ def show_status(args):
        "FAL": "FAL_KEY",
        "Tinker": "TINKER_API_KEY",
        "WandB": "WANDB_API_KEY",
+        "ElevenLabs": "ELEVENLABS_API_KEY",
    }
    
    for name, env_var in keys.items():