fix: voice mode TTS playback and keybinding issues

- Change record key from c-@ to c-r (Ctrl+R) for macOS compatibility
- Add missing tempfile and time imports that caused silent TTS crash
- Use MP3 output for CLI TTS playback (afplay doesn't handle OGG well)
- Strip markdown formatting from text before sending to TTS
- Remove duplicate transcript echo in voice pipeline
This commit is contained in:
0xbyt4 2026-03-03 17:45:11 +03:00
parent ec32e9a540
commit ea5b89825a

35
cli.py
View file

@ -18,6 +18,8 @@ import shutil
import sys import sys
import json import json
import atexit import atexit
import tempfile
import time
import uuid import uuid
import textwrap import textwrap
from contextlib import contextmanager from contextlib import contextmanager
@ -3601,14 +3603,37 @@ class HermesCLI:
from tools.tts_tool import text_to_speech_tool from tools.tts_tool import text_to_speech_tool
from tools.voice_mode import play_audio_file from tools.voice_mode import play_audio_file
import json import json
import re
# Truncate to TTS limit # Strip markdown formatting for cleaner TTS
tts_text = text[:4000] if len(text) > 4000 else text tts_text = text[:4000] if len(text) > 4000 else text
result_json = text_to_speech_tool(text=tts_text) tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold
result = json.loads(result_json) tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic
tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # code
tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers
tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list items
if result.get("success") and result.get("file_path"): # Use MP3 output for CLI playback (afplay doesn't handle OGG well).
play_audio_file(result["file_path"]) # The TTS tool may auto-convert MP3->OGG, but the original MP3 remains.
os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
mp3_path = os.path.join(
tempfile.gettempdir(), "hermes_voice",
f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
)
text_to_speech_tool(text=tts_text, output_path=mp3_path)
# Play the MP3 directly (the TTS tool returns OGG path but MP3 still exists)
if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
play_audio_file(mp3_path)
# Clean up
try:
os.unlink(mp3_path)
ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
if os.path.isfile(ogg_path):
os.unlink(ogg_path)
except OSError:
pass
except Exception as e: except Exception as e:
logger.debug("Voice TTS playback failed: %s", e) logger.debug("Voice TTS playback failed: %s", e)